diff --git a/.gitattributes b/.gitattributes index 823e3e975a2..d6547212393 100644 --- a/.gitattributes +++ b/.gitattributes @@ -83,6 +83,7 @@ Include/opcode_ids.h generated Include/token.h generated Lib/_opcode_metadata.py generated Lib/keyword.py generated +Lib/idlelib/help.html generated Lib/test/certdata/*.pem generated Lib/test/certdata/*.0 generated Lib/test/levenshtein_examples.json generated diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 1086b426204..6acc156ebff 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -126,6 +126,9 @@ Doc/howto/clinic.rst @erlend-aasland @AA-Turner # C Analyser Tools/c-analyzer/ @ericsnowcurrently +# C API Documentation Checks +Tools/check-c-api-docs/ @ZeroIntensity + # Fuzzing Modules/_xxtestfuzz/ @ammaraskar diff --git a/.github/CONTRIBUTING.rst b/.github/CONTRIBUTING.rst index 224061f2c5a..94b67ce3dbe 100644 --- a/.github/CONTRIBUTING.rst +++ b/.github/CONTRIBUTING.rst @@ -28,13 +28,12 @@ Please be aware that our workflow does deviate slightly from the typical GitHub project. Details on how to properly submit a pull request are covered in `Lifecycle of a Pull Request `_. We utilize various bots and status checks to help with this, so do follow the -comments they leave and their "Details" links, respectively. The key points of -our workflow that are not covered by a bot or status check are: +comments they leave and their "Details" links, respectively. -- All discussions that are not directly related to the code in the pull request - should happen on `GitHub Issues `_. -- Upon your first non-trivial pull request (which includes documentation changes), - feel free to add yourself to ``Misc/ACKS``. +The final key part of our workflow is that all discussions that are not +directly related to the code in the pull request should happen on +`GitHub Issues `__, generally in the +pull request's parent issue. Setting Expectations diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 75d174307ce..de6e8756b03 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -5,3 +5,6 @@ contact_links: - name: "Proposing new features" about: "Submit major feature proposal (e.g. syntax changes) to an ideas forum first." url: "https://discuss.python.org/c/ideas/6" + - name: "Python Install Manager issues" + about: "Report issues with the Python Install Manager (for Windows)" + url: "https://github.com/python/pymanager/issues" diff --git a/.github/dependabot.yml b/.github/dependabot.yml index c8a3165d690..7f3376f8ddb 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -12,6 +12,11 @@ updates: update-types: - "version-update:semver-minor" - "version-update:semver-patch" + cooldown: + # https://blog.yossarian.net/2025/11/21/We-should-all-be-using-dependency-cooldowns + # Cooldowns protect against supply chain attacks by avoiding the + # highest-risk window immediately after new releases. + default-days: 14 - package-ecosystem: "pip" directory: "/Tools/" schedule: @@ -19,3 +24,5 @@ updates: labels: - "skip issue" - "skip news" + cooldown: + default-days: 14 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6aa99928278..3d889fa128e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -109,20 +109,10 @@ jobs: python-version: '3.x' - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - # Include env.pythonLocation in key to avoid changes in environment when setup-python updates Python - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }}-${{ env.pythonLocation }} - name: Install dependencies run: sudo ./.github/workflows/posix-deps-apt.sh - name: Add ccache to PATH run: echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Configure CPython run: | # Build Python with the libpython dynamic library @@ -152,6 +142,9 @@ jobs: - name: Check for unsupported C global variables if: github.event_name == 'pull_request' # $GITHUB_EVENT_NAME run: make check-c-globals + - name: Check for undocumented C APIs + run: make check-c-api-docs + build-windows: name: >- @@ -215,7 +208,6 @@ jobs: free-threading: true uses: ./.github/workflows/reusable-macos.yml with: - config_hash: ${{ needs.build-context.outputs.config-hash }} free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} @@ -247,7 +239,6 @@ jobs: bolt: true uses: ./.github/workflows/reusable-ubuntu.yml with: - config_hash: ${{ needs.build-context.outputs.config-hash }} bolt-optimizations: ${{ matrix.bolt }} free-threading: ${{ matrix.free-threading }} os: ${{ matrix.os }} @@ -278,11 +269,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -304,10 +290,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Configure CPython run: ./configure CFLAGS="-fdiagnostics-format=json" --config-cache --enable-slower-safety --with-pydebug --with-openssl="$OPENSSL_DIR" - name: Build CPython @@ -339,11 +321,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -370,10 +347,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Configure CPython run: | ./configure CFLAGS="-fdiagnostics-format=json" \ @@ -442,8 +415,6 @@ jobs: needs: build-context if: needs.build-context.outputs.run-tests == 'true' uses: ./.github/workflows/reusable-wasi.yml - with: - config_hash: ${{ needs.build-context.outputs.config-hash }} test-hypothesis: name: "Hypothesis tests on Ubuntu" @@ -479,10 +450,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: false - name: Setup directory envs for out-of-tree builds run: | echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" @@ -493,11 +460,6 @@ jobs: run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Configure CPython out-of-tree working-directory: ${{ env.CPYTHON_BUILDDIR }} run: | @@ -581,11 +543,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Install dependencies @@ -611,11 +568,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: Configure CPython run: ./configure --config-cache --with-address-sanitizer --without-pymalloc - name: Build CPython @@ -647,7 +599,6 @@ jobs: uses: ./.github/workflows/reusable-san.yml with: sanitizer: ${{ matrix.sanitizer }} - config_hash: ${{ needs.build-context.outputs.config-hash }} free-threading: ${{ matrix.free-threading }} cross-build-linux: @@ -662,11 +613,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }} - name: Register gcc problem matcher run: echo "::add-matcher::.github/problem-matchers/gcc.json" - name: Set build dir diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 151b17e8442..62325250bd3 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -68,7 +68,7 @@ jobs: - true - false llvm: - - 20 + - 21 include: - target: i686-pc-windows-msvc/msvc architecture: Win32 @@ -138,7 +138,7 @@ jobs: fail-fast: false matrix: llvm: - - 20 + - 21 steps: - uses: actions/checkout@v4 with: @@ -166,7 +166,7 @@ jobs: fail-fast: false matrix: llvm: - - 20 + - 21 steps: - uses: actions/checkout@v4 with: @@ -183,3 +183,27 @@ jobs: - name: Run tests without optimizations run: | PYTHON_UOPS_OPTIMIZE=0 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 + + tail-call-jit: + name: JIT with tail calling interpreter + needs: interpreter + runs-on: ubuntu-24.04 + timeout-minutes: 90 + strategy: + fail-fast: false + matrix: + llvm: + - 21 + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Build with JIT and tailcall + run: | + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + CC=clang-${{ matrix.llvm }} ./configure --enable-experimental-jit --with-tail-call-interp --with-pydebug + make all --jobs 4 diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index d2668ddcac1..66c7cc47de0 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -17,9 +17,6 @@ on: # yamllint disable-line rule:truthy # || 'falsy-branch' # }} # - config-hash: - description: Config hash value for use in cache keys - value: ${{ jobs.compute-changes.outputs.config-hash }} # str run-docs: description: Whether to build the docs value: ${{ jobs.compute-changes.outputs.run-docs }} # bool @@ -42,7 +39,6 @@ jobs: runs-on: ubuntu-latest timeout-minutes: 10 outputs: - config-hash: ${{ steps.config-hash.outputs.hash }} run-ci-fuzz: ${{ steps.changes.outputs.run-ci-fuzz }} run-docs: ${{ steps.changes.outputs.run-docs }} run-tests: ${{ steps.changes.outputs.run-tests }} @@ -100,8 +96,3 @@ jobs: GITHUB_EVENT_NAME: ${{ github.event_name }} CCF_TARGET_REF: ${{ github.base_ref || github.event.repository.default_branch }} CCF_HEAD_REF: ${{ github.event.pull_request.head.sha || github.sha }} - - - name: Compute hash for config cache key - id: config-hash - run: | - echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index 3d310ae695b..98d557ba1ea 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -3,9 +3,6 @@ name: Reusable macOS on: workflow_call: inputs: - config_hash: - required: true - type: string free-threading: required: false type: boolean @@ -36,11 +33,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }} - name: Install Homebrew dependencies run: | brew install pkg-config openssl@3.0 xz gdbm tcl-tk@9 make diff --git a/.github/workflows/reusable-san.yml b/.github/workflows/reusable-san.yml index e6ff02e4838..c601d0b7338 100644 --- a/.github/workflows/reusable-san.yml +++ b/.github/workflows/reusable-san.yml @@ -6,9 +6,6 @@ on: sanitizer: required: true type: string - config_hash: - required: true - type: string free-threading: description: Whether to use free-threaded mode required: false @@ -34,11 +31,6 @@ jobs: persist-credentials: false - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.sanitizer }}-${{ inputs.config_hash }} - name: Install dependencies run: | sudo ./.github/workflows/posix-deps-apt.sh @@ -77,11 +69,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: Configure CPython run: >- ./configure diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 7f8b9fdf5d6..0c1ebe29ae3 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -3,9 +3,6 @@ name: Reusable Ubuntu on: workflow_call: inputs: - config_hash: - required: true - type: string bolt-optimizations: description: Whether to enable BOLT optimizations required: false @@ -64,11 +61,6 @@ jobs: - name: Add ccache to PATH run: | echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - - name: Configure ccache action - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: Setup directory envs for out-of-tree builds run: | echo "CPYTHON_RO_SRCDIR=$(realpath -m "${GITHUB_WORKSPACE}"/../cpython-ro-srcdir)" >> "$GITHUB_ENV" @@ -79,11 +71,6 @@ jobs: run: sudo mount --bind -o ro "$GITHUB_WORKSPACE" "$CPYTHON_RO_SRCDIR" - name: Runner image version run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: Restore config.cache - uses: actions/cache@v4 - with: - path: ${{ env.CPYTHON_BUILDDIR }}/config.cache - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ inputs.config_hash }} - name: Configure CPython out-of-tree working-directory: ${{ env.CPYTHON_BUILDDIR }} # `test_unpickle_module_race` writes to the source directory, which is diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 7c40e566b2a..91d76fd1b5f 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -2,10 +2,6 @@ name: Reusable WASI on: workflow_call: - inputs: - config_hash: - required: true - type: string env: FORCE_COLOR: 1 @@ -17,7 +13,7 @@ jobs: timeout-minutes: 60 env: WASMTIME_VERSION: 38.0.3 - WASI_SDK_VERSION: 25 + WASI_SDK_VERSION: 29 WASI_SDK_PATH: /opt/wasi-sdk CROSS_BUILD_PYTHON: cross-build/build CROSS_BUILD_WASI: cross-build/wasm32-wasip1 @@ -42,11 +38,6 @@ jobs: mkdir "${WASI_SDK_PATH}" && \ curl -s -S --location "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-${WASI_SDK_VERSION}/wasi-sdk-${WASI_SDK_VERSION}.0-arm64-linux.tar.gz" | \ tar --strip-components 1 --directory "${WASI_SDK_PATH}" --extract --gunzip - - name: "Configure ccache action" - uses: hendrikmuhs/ccache-action@v1.2 - with: - save: ${{ github.event_name == 'push' }} - max-size: "200M" - name: "Add ccache to PATH" run: echo "PATH=/usr/lib/ccache:$PATH" >> "$GITHUB_ENV" - name: "Install Python" @@ -55,29 +46,15 @@ jobs: python-version: '3.x' - name: "Runner image version" run: echo "IMAGE_OS_VERSION=${ImageOS}-${ImageVersion}" >> "$GITHUB_ENV" - - name: "Restore Python build config.cache" - uses: actions/cache@v4 - with: - path: ${{ env.CROSS_BUILD_PYTHON }}/config.cache - # Include env.pythonLocation in key to avoid changes in environment when setup-python updates Python. - # Include the hash of `Tools/wasm/wasi.py` as it may change the environment variables. - # (Make sure to keep the key in sync with the other config.cache step below.) - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ env.WASI_SDK_VERSION }}-${{ env.WASMTIME_VERSION }}-${{ inputs.config_hash }}-${{ hashFiles('Tools/wasm/wasi.py') }}-${{ env.pythonLocation }} - name: "Configure build Python" - run: python3 Tools/wasm/wasi.py configure-build-python -- --config-cache --with-pydebug + run: python3 Tools/wasm/wasi configure-build-python -- --config-cache --with-pydebug - name: "Make build Python" - run: python3 Tools/wasm/wasi.py make-build-python - - name: "Restore host config.cache" - uses: actions/cache@v4 - with: - path: ${{ env.CROSS_BUILD_WASI }}/config.cache - # Should be kept in sync with the other config.cache step above. - key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ env.WASI_SDK_VERSION }}-${{ env.WASMTIME_VERSION }}-${{ inputs.config_hash }}-${{ hashFiles('Tools/wasm/wasi.py') }}-${{ env.pythonLocation }} + run: python3 Tools/wasm/wasi make-build-python - name: "Configure host" # `--with-pydebug` inferred from configure-build-python - run: python3 Tools/wasm/wasi.py configure-host -- --config-cache + run: python3 Tools/wasm/wasi configure-host -- --config-cache - name: "Make host" - run: python3 Tools/wasm/wasi.py make-host + run: python3 Tools/wasm/wasi make-host - name: "Display build info" run: make --directory "${CROSS_BUILD_WASI}" pythoninfo - name: "Test" diff --git a/.gitignore b/.gitignore index 2bf4925647d..4ea2fd96554 100644 --- a/.gitignore +++ b/.gitignore @@ -135,7 +135,6 @@ Tools/unicode/data/ /config.log /config.status /config.status.lineno -# hendrikmuhs/ccache-action@v1 /.ccache /cross-build/ /jit_stencils*.h diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b0311f05279..c5767ee841e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,10 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.13.2 hooks: + - id: ruff-check + name: Run Ruff (lint) on Apple/ + args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml] + files: ^Apple/ - id: ruff-check name: Run Ruff (lint) on Doc/ args: [--exit-non-zero-on-fix] @@ -30,6 +34,10 @@ repos: name: Run Ruff (lint) on Tools/wasm/ args: [--exit-non-zero-on-fix, --config=Tools/wasm/.ruff.toml] files: ^Tools/wasm/ + - id: ruff-format + name: Run Ruff (format) on Apple/ + args: [--exit-non-zero-on-fix, --config=Apple/.ruff.toml] + files: ^Apple - id: ruff-format name: Run Ruff (format) on Doc/ args: [--check] diff --git a/Android/android.py b/Android/android.py index 25bb4ca70b5..d1a10be776e 100755 --- a/Android/android.py +++ b/Android/android.py @@ -29,6 +29,7 @@ ANDROID_DIR.name == "Android" and (PYTHON_DIR / "pyconfig.h.in").exists() ) +ENV_SCRIPT = ANDROID_DIR / "android-env.sh" TESTBED_DIR = ANDROID_DIR / "testbed" CROSS_BUILD_DIR = PYTHON_DIR / "cross-build" @@ -129,12 +130,11 @@ def android_env(host): sysconfig_filename = next(sysconfig_files).name host = re.fullmatch(r"_sysconfigdata__android_(.+).py", sysconfig_filename)[1] - env_script = ANDROID_DIR / "android-env.sh" env_output = subprocess.run( f"set -eu; " f"HOST={host}; " f"PREFIX={prefix}; " - f". {env_script}; " + f". {ENV_SCRIPT}; " f"export", check=True, shell=True, capture_output=True, encoding='utf-8', ).stdout @@ -151,7 +151,7 @@ def android_env(host): env[key] = value if not env: - raise ValueError(f"Found no variables in {env_script.name} output:\n" + raise ValueError(f"Found no variables in {ENV_SCRIPT.name} output:\n" + env_output) return env @@ -281,15 +281,30 @@ def clean_all(context): def setup_ci(): - # https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/ - if "GITHUB_ACTIONS" in os.environ and platform.system() == "Linux": - run( - ["sudo", "tee", "/etc/udev/rules.d/99-kvm4all.rules"], - input='KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"\n', - text=True, - ) - run(["sudo", "udevadm", "control", "--reload-rules"]) - run(["sudo", "udevadm", "trigger", "--name-match=kvm"]) + if "GITHUB_ACTIONS" in os.environ: + # Enable emulator hardware acceleration + # (https://github.blog/changelog/2024-04-02-github-actions-hardware-accelerated-android-virtualization-now-available/). + if platform.system() == "Linux": + run( + ["sudo", "tee", "/etc/udev/rules.d/99-kvm4all.rules"], + input='KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"\n', + text=True, + ) + run(["sudo", "udevadm", "control", "--reload-rules"]) + run(["sudo", "udevadm", "trigger", "--name-match=kvm"]) + + # Free up disk space by deleting unused versions of the NDK + # (https://github.com/freakboy3742/pyspamsum/pull/108). + for line in ENV_SCRIPT.read_text().splitlines(): + if match := re.fullmatch(r"ndk_version=(.+)", line): + ndk_version = match[1] + break + else: + raise ValueError(f"Failed to find NDK version in {ENV_SCRIPT.name}") + + for item in (android_home / "ndk").iterdir(): + if item.name[0].isdigit() and item.name != ndk_version: + delete_glob(item) def setup_sdk(): diff --git a/Android/testbed/app/build.gradle.kts b/Android/testbed/app/build.gradle.kts index 4de628a279c..14d43d8c4d5 100644 --- a/Android/testbed/app/build.gradle.kts +++ b/Android/testbed/app/build.gradle.kts @@ -79,7 +79,7 @@ android { val androidEnvFile = file("../../android-env.sh").absoluteFile namespace = "org.python.testbed" - compileSdk = 34 + compileSdk = 35 defaultConfig { applicationId = "org.python.testbed" @@ -92,7 +92,7 @@ android { } throw GradleException("Failed to find API level in $androidEnvFile") } - targetSdk = 34 + targetSdk = 35 versionCode = 1 versionName = "1.0" diff --git a/Apple/.ruff.toml b/Apple/.ruff.toml new file mode 100644 index 00000000000..4cdc39ebee4 --- /dev/null +++ b/Apple/.ruff.toml @@ -0,0 +1,22 @@ +extend = "../.ruff.toml" # Inherit the project-wide settings + +[format] +preview = true +docstring-code-format = true + +[lint] +select = [ + "C4", # flake8-comprehensions + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "ISC", # flake8-implicit-str-concat + "LOG", # flake8-logging + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "PYI", # flake8-pyi + "RUF100", # Ban unused `# noqa` comments + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] diff --git a/Apple/__main__.py b/Apple/__main__.py index 34744871f68..256966e76c2 100644 --- a/Apple/__main__.py +++ b/Apple/__main__.py @@ -46,13 +46,12 @@ import sys import sysconfig import time -from collections.abc import Sequence +from collections.abc import Callable, Sequence from contextlib import contextmanager from datetime import datetime, timezone from os.path import basename, relpath from pathlib import Path from subprocess import CalledProcessError -from typing import Callable EnvironmentT = dict[str, str] ArgsT = Sequence[str | Path] @@ -140,17 +139,15 @@ def print_env(env: EnvironmentT) -> None: def apple_env(host: str) -> EnvironmentT: """Construct an Apple development environment for the given host.""" env = { - "PATH": ":".join( - [ - str(PYTHON_DIR / "Apple/iOS/Resources/bin"), - str(subdir(host) / "prefix"), - "/usr/bin", - "/bin", - "/usr/sbin", - "/sbin", - "/Library/Apple/usr/bin", - ] - ), + "PATH": ":".join([ + str(PYTHON_DIR / "Apple/iOS/Resources/bin"), + str(subdir(host) / "prefix"), + "/usr/bin", + "/bin", + "/usr/sbin", + "/sbin", + "/Library/Apple/usr/bin", + ]), } return env @@ -196,14 +193,10 @@ def clean(context: argparse.Namespace, target: str = "all") -> None: paths.append(target) if target in {"all", "hosts", "test"}: - paths.extend( - [ - path.name - for path in CROSS_BUILD_DIR.glob( - f"{context.platform}-testbed.*" - ) - ] - ) + paths.extend([ + path.name + for path in CROSS_BUILD_DIR.glob(f"{context.platform}-testbed.*") + ]) for path in paths: delete_path(path) @@ -352,18 +345,16 @@ def download(url: str, target_dir: Path) -> Path: out_path = target_path / basename(url) if not Path(out_path).is_file(): - run( - [ - "curl", - "-Lf", - "--retry", - "5", - "--retry-all-errors", - "-o", - out_path, - url, - ] - ) + run([ + "curl", + "-Lf", + "--retry", + "5", + "--retry-all-errors", + "-o", + out_path, + url, + ]) else: print(f"Using cached version of {basename(url)}") return out_path @@ -468,8 +459,7 @@ def package_version(prefix_path: Path) -> str: def lib_platform_files(dirname, names): - """A file filter that ignores platform-specific files in the lib directory. - """ + """A file filter that ignores platform-specific files in lib.""" path = Path(dirname) if ( path.parts[-3] == "lib" @@ -478,7 +468,7 @@ def lib_platform_files(dirname, names): ): return names elif path.parts[-2] == "lib" and path.parts[-1].startswith("python"): - ignored_names = set( + ignored_names = { name for name in names if ( @@ -486,7 +476,13 @@ def lib_platform_files(dirname, names): or name.startswith("_sysconfig_vars_") or name == "build-details.json" ) - ) + } + elif path.parts[-1] == "lib": + ignored_names = { + name + for name in names + if name.startswith("libpython") and name.endswith(".dylib") + } else: ignored_names = set() @@ -499,7 +495,9 @@ def lib_non_platform_files(dirname, names): """ path = Path(dirname) if path.parts[-2] == "lib" and path.parts[-1].startswith("python"): - return set(names) - lib_platform_files(dirname, names) - {"lib-dynload"} + return ( + set(names) - lib_platform_files(dirname, names) - {"lib-dynload"} + ) else: return set() @@ -507,14 +505,15 @@ def lib_non_platform_files(dirname, names): def create_xcframework(platform: str) -> str: """Build an XCframework from the component parts for the platform. - :return: The version number of the Python verion that was packaged. + :return: The version number of the Python version that was packaged. """ package_path = CROSS_BUILD_DIR / platform try: package_path.mkdir() except FileExistsError: raise RuntimeError( - f"{platform} XCframework already exists; do you need to run with --clean?" + f"{platform} XCframework already exists; do you need to run " + "with --clean?" ) from None frameworks = [] @@ -607,7 +606,7 @@ def create_xcframework(platform: str) -> str: print(f" - {slice_name} binaries") shutil.copytree(first_path / "bin", slice_path / "bin") - # Copy the include path (this will be a symlink to the framework headers) + # Copy the include path (a symlink to the framework headers) print(f" - {slice_name} include files") shutil.copytree( first_path / "include", @@ -621,6 +620,12 @@ def create_xcframework(platform: str) -> str: slice_framework / "Headers/pyconfig.h", ) + print(f" - {slice_name} shared library") + # Create a simlink for the fat library + shared_lib = slice_path / f"lib/libpython{version_tag}.dylib" + shared_lib.parent.mkdir() + shared_lib.symlink_to("../Python.framework/Python") + print(f" - {slice_name} architecture-specific files") for host_triple, multiarch in slice_parts.items(): print(f" - {multiarch} standard library") @@ -632,6 +637,7 @@ def create_xcframework(platform: str) -> str: framework_path(host_triple, multiarch) / "lib", package_path / "Python.xcframework/lib", ignore=lib_platform_files, + symlinks=True, ) has_common_stdlib = True @@ -639,6 +645,7 @@ def create_xcframework(platform: str) -> str: framework_path(host_triple, multiarch) / "lib", slice_path / f"lib-{arch}", ignore=lib_non_platform_files, + symlinks=True, ) # Copy the host's pyconfig.h to an architecture-specific name. @@ -659,7 +666,8 @@ def create_xcframework(platform: str) -> str: # statically link those libraries into a Framework, you become # responsible for providing a privacy manifest for that framework. xcprivacy_file = { - "OpenSSL": subdir(host_triple) / "prefix/share/OpenSSL.xcprivacy" + "OpenSSL": subdir(host_triple) + / "prefix/share/OpenSSL.xcprivacy" } print(f" - {multiarch} xcprivacy files") for module, lib in [ @@ -669,7 +677,8 @@ def create_xcframework(platform: str) -> str: shutil.copy( xcprivacy_file[lib], slice_path - / f"lib-{arch}/python{version_tag}/lib-dynload/{module}.xcprivacy", + / f"lib-{arch}/python{version_tag}" + / f"lib-dynload/{module}.xcprivacy", ) print(" - build tools") @@ -692,18 +701,16 @@ def package(context: argparse.Namespace) -> None: # Clone testbed print() - run( - [ - sys.executable, - "Apple/testbed", - "clone", - "--platform", - context.platform, - "--framework", - CROSS_BUILD_DIR / context.platform / "Python.xcframework", - CROSS_BUILD_DIR / context.platform / "testbed", - ] - ) + run([ + sys.executable, + "Apple/testbed", + "clone", + "--platform", + context.platform, + "--framework", + CROSS_BUILD_DIR / context.platform / "Python.xcframework", + CROSS_BUILD_DIR / context.platform / "testbed", + ]) # Build the final archive archive_name = ( @@ -757,7 +764,7 @@ def build(context: argparse.Namespace, host: str | None = None) -> None: package(context) -def test(context: argparse.Namespace, host: str | None = None) -> None: +def test(context: argparse.Namespace, host: str | None = None) -> None: # noqa: PT028 """The implementation of the "test" command.""" if host is None: host = context.host @@ -795,18 +802,16 @@ def test(context: argparse.Namespace, host: str | None = None) -> None: / f"Frameworks/{apple_multiarch(host)}" ) - run( - [ - sys.executable, - "Apple/testbed", - "clone", - "--platform", - context.platform, - "--framework", - framework_path, - testbed_dir, - ] - ) + run([ + sys.executable, + "Apple/testbed", + "clone", + "--platform", + context.platform, + "--framework", + framework_path, + testbed_dir, + ]) run( [ @@ -840,7 +845,7 @@ def apple_sim_host(platform_name: str) -> str: """Determine the native simulator target for this platform.""" for _, slice_parts in HOSTS[platform_name].items(): for host_triple in slice_parts: - parts = host_triple.split('-') + parts = host_triple.split("-") if parts[0] == platform.machine() and parts[-1] == "simulator": return host_triple @@ -968,20 +973,29 @@ def parse_args() -> argparse.Namespace: cmd.add_argument( "--simulator", help=( - "The name of the simulator to use (eg: 'iPhone 16e'). Defaults to " - "the most recently released 'entry level' iPhone device. Device " - "architecture and OS version can also be specified; e.g., " - "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would run on " - "an ARM64 iPhone 16 Pro simulator running iOS 26.0." + "The name of the simulator to use (eg: 'iPhone 16e'). " + "Defaults to the most recently released 'entry level' " + "iPhone device. Device architecture and OS version can also " + "be specified; e.g., " + "`--simulator 'iPhone 16 Pro,arch=arm64,OS=26.0'` would " + "run on an ARM64 iPhone 16 Pro simulator running iOS 26.0." ), ) group = cmd.add_mutually_exclusive_group() group.add_argument( - "--fast-ci", action="store_const", dest="ci_mode", const="fast", - help="Add test arguments for GitHub Actions") + "--fast-ci", + action="store_const", + dest="ci_mode", + const="fast", + help="Add test arguments for GitHub Actions", + ) group.add_argument( - "--slow-ci", action="store_const", dest="ci_mode", const="slow", - help="Add test arguments for buildbots") + "--slow-ci", + action="store_const", + dest="ci_mode", + const="slow", + help="Add test arguments for buildbots", + ) for subcommand in [configure_build, configure_host, build, ci]: subcommand.add_argument( diff --git a/Apple/testbed/Python.xcframework/build/utils.sh b/Apple/testbed/Python.xcframework/build/utils.sh index 961c46d014b..e7155d8b30e 100755 --- a/Apple/testbed/Python.xcframework/build/utils.sh +++ b/Apple/testbed/Python.xcframework/build/utils.sh @@ -46,7 +46,8 @@ install_stdlib() { rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" rsync -au "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib-$ARCHS/" "$CODESIGNING_FOLDER_PATH/python/lib/" else - rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" + # A single-arch framework will have a libpython symlink; that can't be included at runtime + rsync -au --delete "$PROJECT_DIR/$PYTHON_XCFRAMEWORK_PATH/$SLICE_FOLDER/lib/" "$CODESIGNING_FOLDER_PATH/python/lib/" --exclude 'libpython*.dylib' fi } diff --git a/Apple/testbed/__main__.py b/Apple/testbed/__main__.py index 42eb60a4c8d..0dd77ab8b82 100644 --- a/Apple/testbed/__main__.py +++ b/Apple/testbed/__main__.py @@ -2,6 +2,7 @@ import json import os import re +import shlex import shutil import subprocess import sys @@ -31,15 +32,15 @@ def select_simulator_device(platform): json_data = json.loads(raw_json) if platform == "iOS": - # Any iOS device will do; we'll look for "SE" devices - but the name isn't - # consistent over time. Older Xcode versions will use "iPhone SE (Nth - # generation)"; As of 2025, they've started using "iPhone 16e". + # Any iOS device will do; we'll look for "SE" devices - but the name + # isn't consistent over time. Older Xcode versions will use "iPhone SE + # (Nth generation)"; As of 2025, they've started using "iPhone 16e". # - # When Xcode is updated after a new release, new devices will be available - # and old ones will be dropped from the set available on the latest iOS - # version. Select the one with the highest minimum runtime version - this - # is an indicator of the "newest" released device, which should always be - # supported on the "most recent" iOS version. + # When Xcode is updated after a new release, new devices will be + # available and old ones will be dropped from the set available on the + # latest iOS version. Select the one with the highest minimum runtime + # version - this is an indicator of the "newest" released device, which + # should always be supported on the "most recent" iOS version. se_simulators = sorted( (devicetype["minRuntimeVersion"], devicetype["name"]) for devicetype in json_data["devicetypes"] @@ -252,7 +253,7 @@ def update_test_plan(testbed_path, platform, args): test_plan = json.load(f) test_plan["defaultOptions"]["commandLineArgumentEntries"] = [ - {"argument": arg} for arg in args + {"argument": shlex.quote(arg)} for arg in args ] with test_plan_path.open("w", encoding="utf-8") as f: @@ -294,7 +295,8 @@ def main(): parser = argparse.ArgumentParser( description=( - "Manages the process of testing an Apple Python project through Xcode." + "Manages the process of testing an Apple Python project " + "through Xcode." ), ) @@ -335,7 +337,10 @@ def main(): run = subcommands.add_parser( "run", - usage="%(prog)s [-h] [--simulator SIMULATOR] -- [ ...]", + usage=( + "%(prog)s [-h] [--simulator SIMULATOR] -- " + " [ ...]" + ), description=( "Run a testbed project. The arguments provided after `--` will be " "passed to the running iOS process as if they were arguments to " @@ -396,9 +401,9 @@ def main(): / "bin" ).is_dir(): print( - f"Testbed does not contain a compiled Python framework. Use " - f"`python {sys.argv[0]} clone ...` to create a runnable " - f"clone of this testbed." + "Testbed does not contain a compiled Python framework. " + f"Use `python {sys.argv[0]} clone ...` to create a " + "runnable clone of this testbed." ) sys.exit(20) @@ -410,7 +415,8 @@ def main(): ) else: print( - f"Must specify test arguments (e.g., {sys.argv[0]} run -- test)" + "Must specify test arguments " + f"(e.g., {sys.argv[0]} run -- test)" ) print() parser.print_help(sys.stderr) diff --git a/Doc/Makefile b/Doc/Makefile index f6f4c721080..f16d9cacb1b 100644 --- a/Doc/Makefile +++ b/Doc/Makefile @@ -241,7 +241,8 @@ dist-pdf: # as otherwise the full latexmk process is run twice. # ($$ is needed to escape the $; https://www.gnu.org/software/make/manual/make.html#Basics-of-Variable-References) -sed -i 's/: all-$$(FMT)/:/' build/latex/Makefile - (cd build/latex; $(MAKE) clean && $(MAKE) --jobs=$$((`nproc`+1)) --output-sync LATEXMKOPTS='-quiet' all-pdf && $(MAKE) FMT=pdf zip bz2) + if [ -n "$(filter output-sync,$(value .FEATURES))" ]; then OUTPUTSYNC=--output-sync; else OUTPUTSYNC=; fi && \ + (cd build/latex; $(MAKE) clean && $(MAKE) --jobs=$$((`getconf _NPROCESSORS_ONLN`+1)) $$OUTPUTSYNC LATEXMKOPTS='-quiet' all-pdf && $(MAKE) FMT=pdf zip bz2) cp build/latex/docs-pdf.zip dist/python-$(DISTVERSION)-docs-pdf-a4.zip cp build/latex/docs-pdf.tar.bz2 dist/python-$(DISTVERSION)-docs-pdf-a4.tar.bz2 @echo "Build finished and archived!" diff --git a/Doc/about.rst b/Doc/about.rst index 8f635d7f743..5c1b497ca6b 100644 --- a/Doc/about.rst +++ b/Doc/about.rst @@ -32,8 +32,9 @@ Contributors to the Python documentation ---------------------------------------- Many people have contributed to the Python language, the Python standard -library, and the Python documentation. See :source:`Misc/ACKS` in the Python -source distribution for a partial list of contributors. +library, and the Python documentation. See the `CPython +GitHub repository `__ +for a partial list of contributors. It is only with the input and contributions of the Python community that Python has such wonderful documentation -- Thank You! diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index 59d913a0462..59044d2d88c 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -140,10 +140,6 @@ Allocating Objects on the Heap * :c:member:`~PyTypeObject.tp_alloc` -.. c:function:: void PyObject_Del(void *op) - - Same as :c:func:`PyObject_Free`. - .. c:var:: PyObject _Py_NoneStruct Object which is visible in Python as ``None``. This should only be accessed @@ -156,3 +152,35 @@ Allocating Objects on the Heap :ref:`moduleobjects` To allocate and create extension modules. + +Deprecated aliases +^^^^^^^^^^^^^^^^^^ + +These are :term:`soft deprecated` aliases to existing functions and macros. +They exist solely for backwards compatibility. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * Deprecated alias + * Function + * * .. c:macro:: PyObject_NEW(type, typeobj) + * :c:macro:`PyObject_New` + * * .. c:macro:: PyObject_NEW_VAR(type, typeobj, n) + * :c:macro:`PyObject_NewVar` + * * .. c:macro:: PyObject_INIT(op, typeobj) + * :c:func:`PyObject_Init` + * * .. c:macro:: PyObject_INIT_VAR(op, typeobj, n) + * :c:func:`PyObject_InitVar` + * * .. c:macro:: PyObject_MALLOC(n) + * :c:func:`PyObject_Malloc` + * * .. c:macro:: PyObject_REALLOC(p, n) + * :c:func:`PyObject_Realloc` + * * .. c:macro:: PyObject_FREE(p) + * :c:func:`PyObject_Free` + * * .. c:macro:: PyObject_DEL(p) + * :c:func:`PyObject_Free` + * * .. c:macro:: PyObject_Del(p) + * :c:func:`PyObject_Free` diff --git a/Doc/c-api/buffer.rst b/Doc/c-api/buffer.rst index d3081894ead..6bb72a2312b 100644 --- a/Doc/c-api/buffer.rst +++ b/Doc/c-api/buffer.rst @@ -261,6 +261,10 @@ readonly, format MUST be consistent for all consumers. For example, :c:expr:`PyBUF_SIMPLE | PyBUF_WRITABLE` can be used to request a simple writable buffer. + .. c:macro:: PyBUF_WRITEABLE + + This is a :term:`soft deprecated` alias to :c:macro:`PyBUF_WRITABLE`. + .. c:macro:: PyBUF_FORMAT Controls the :c:member:`~Py_buffer.format` field. If set, this field MUST diff --git a/Doc/c-api/bytes.rst b/Doc/c-api/bytes.rst index 865a9e5d2bf..82c25573683 100644 --- a/Doc/c-api/bytes.rst +++ b/Doc/c-api/bytes.rst @@ -228,6 +228,42 @@ called with a non-bytes parameter. The function is :term:`soft deprecated`, use the :c:type:`PyBytesWriter` API instead. + +.. c:function:: PyObject *PyBytes_Repr(PyObject *bytes, int smartquotes) + + Get the string representation of *bytes*. This function is currently used to + implement :meth:`!bytes.__repr__` in Python. + + This function does not do type checking; it is undefined behavior to pass + *bytes* as a non-bytes object or ``NULL``. + + If *smartquotes* is true, the representation will use a double-quoted string + instead of single-quoted string when single-quotes are present in *bytes*. + For example, the byte string ``'Python'`` would be represented as + ``b"'Python'"`` when *smartquotes* is true, or ``b'\'Python\''`` when it is + false. + + On success, this function returns a :term:`strong reference` to a + :class:`str` object containing the representation. On failure, this + returns ``NULL`` with an exception set. + + +.. c:function:: PyObject *PyBytes_DecodeEscape(const char *s, Py_ssize_t len, const char *errors, Py_ssize_t unicode, const char *recode_encoding) + + Unescape a backslash-escaped string *s*. *s* must not be ``NULL``. + *len* must be the size of *s*. + + *errors* must be one of ``"strict"``, ``"replace"``, or ``"ignore"``. If + *errors* is ``NULL``, then ``"strict"`` is used by default. + + On success, this function returns a :term:`strong reference` to a Python + :class:`bytes` object containing the unescaped string. On failure, this + function returns ``NULL`` with an exception set. + + .. versionchanged:: 3.9 + *unicode* and *recode_encoding* are now unused. + + .. _pybyteswriter: PyBytesWriter diff --git a/Doc/c-api/capsule.rst b/Doc/c-api/capsule.rst index 6da020efc7f..03a848d68ed 100644 --- a/Doc/c-api/capsule.rst +++ b/Doc/c-api/capsule.rst @@ -22,6 +22,12 @@ Refer to :ref:`using-capsules` for more information on using these objects. loaded modules. +.. c:var:: PyTypeObject PyCapsule_Type + + The type object corresponding to capsule objects. This is the same object + as :class:`types.CapsuleType` in the Python layer. + + .. c:type:: PyCapsule_Destructor The type of a destructor callback for a capsule. Defined as:: diff --git a/Doc/c-api/cell.rst b/Doc/c-api/cell.rst index 61eb994c370..2501ed9580d 100644 --- a/Doc/c-api/cell.rst +++ b/Doc/c-api/cell.rst @@ -7,7 +7,7 @@ Cell Objects "Cell" objects are used to implement variables referenced by multiple scopes. For each such variable, a cell object is created to store the value; the local -variables of each stack frame that references the value contains a reference to +variables of each stack frame that references the value contain a reference to the cells from outer scopes which also use that variable. When the value is accessed, the value contained in the cell is used instead of the cell object itself. This de-referencing of the cell object requires support from the diff --git a/Doc/c-api/code.rst b/Doc/c-api/code.rst index c9741b61254..45f5e83adc4 100644 --- a/Doc/c-api/code.rst +++ b/Doc/c-api/code.rst @@ -211,6 +211,17 @@ bound into a function. .. versionadded:: 3.12 +.. c:function:: PyObject *PyCode_Optimize(PyObject *code, PyObject *consts, PyObject *names, PyObject *lnotab_obj) + + This is a :term:`soft deprecated` function that does nothing. + + Prior to Python 3.10, this function would perform basic optimizations to a + code object. + + .. versionchanged:: 3.10 + This function now does nothing. + + .. _c_codeobject_flags: Code Object Flags diff --git a/Doc/c-api/codec.rst b/Doc/c-api/codec.rst index 08a99245ad6..35ee048bd5f 100644 --- a/Doc/c-api/codec.rst +++ b/Doc/c-api/codec.rst @@ -129,3 +129,13 @@ Registry API for Unicode encoding error handlers Replace the unicode encode error with ``\N{...}`` escapes. .. versionadded:: 3.5 + + +Codec utility variables +----------------------- + +.. c:var:: const char *Py_hexdigits + + A string constant containing the lowercase hexadecimal digits: ``"0123456789abcdef"``. + + .. versionadded:: 3.3 diff --git a/Doc/c-api/complex.rst b/Doc/c-api/complex.rst index d135637a741..629312bd771 100644 --- a/Doc/c-api/complex.rst +++ b/Doc/c-api/complex.rst @@ -82,7 +82,7 @@ Complex Number Objects .. c:type:: Py_complex - This C structure defines export format for a Python complex + This C structure defines an export format for a Python complex number object. .. c:member:: double real diff --git a/Doc/c-api/concrete.rst b/Doc/c-api/concrete.rst index 880f7b15ce6..1746fe95eaa 100644 --- a/Doc/c-api/concrete.rst +++ b/Doc/c-api/concrete.rst @@ -109,11 +109,20 @@ Other Objects descriptor.rst slice.rst memoryview.rst + picklebuffer.rst weakref.rst capsule.rst frame.rst gen.rst coro.rst contextvars.rst - datetime.rst typehints.rst + + +C API for extension modules +=========================== + +.. toctree:: + + curses.rst + datetime.rst diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index cc7a3d9d956..96078d22710 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -105,7 +105,7 @@ The following functions provide locale-independent string to number conversions. If ``s`` represents a value that is too large to store in a float (for example, ``"1e500"`` is such a string on many platforms) then - if ``overflow_exception`` is ``NULL`` return ``Py_INFINITY`` (with + if ``overflow_exception`` is ``NULL`` return :c:macro:`!INFINITY` (with an appropriate sign) and don't set any exception. Otherwise, ``overflow_exception`` must point to a Python exception object; raise that exception and return ``-1.0``. In both cases, set @@ -128,18 +128,28 @@ The following functions provide locale-independent string to number conversions. must be 0 and is ignored. The ``'r'`` format code specifies the standard :func:`repr` format. - *flags* can be zero or more of the values ``Py_DTSF_SIGN``, - ``Py_DTSF_ADD_DOT_0``, or ``Py_DTSF_ALT``, or-ed together: + *flags* can be zero or more of the following values or-ed together: - * ``Py_DTSF_SIGN`` means to always precede the returned string with a sign - character, even if *val* is non-negative. + .. c:macro:: Py_DTSF_SIGN - * ``Py_DTSF_ADD_DOT_0`` means to ensure that the returned string will not look - like an integer. + Always precede the returned string with a sign + character, even if *val* is non-negative. - * ``Py_DTSF_ALT`` means to apply "alternate" formatting rules. See the - documentation for the :c:func:`PyOS_snprintf` ``'#'`` specifier for - details. + .. c:macro:: Py_DTSF_ADD_DOT_0 + + Ensure that the returned string will not look like an integer. + + .. c:macro:: Py_DTSF_ALT + + Apply "alternate" formatting rules. + See the documentation for the :c:func:`PyOS_snprintf` ``'#'`` specifier for + details. + + .. c:macro:: Py_DTSF_NO_NEG_0 + + Negative zero is converted to positive zero. + + .. versionadded:: 3.11 If *ptype* is non-``NULL``, then the value it points to will be set to one of ``Py_DTST_FINITE``, ``Py_DTST_INFINITE``, or ``Py_DTST_NAN``, signifying that @@ -152,13 +162,85 @@ The following functions provide locale-independent string to number conversions. .. versionadded:: 3.1 -.. c:function:: int PyOS_stricmp(const char *s1, const char *s2) +.. c:function:: int PyOS_mystricmp(const char *str1, const char *str2) + int PyOS_mystrnicmp(const char *str1, const char *str2, Py_ssize_t size) - Case insensitive comparison of strings. The function works almost - identically to :c:func:`!strcmp` except that it ignores the case. + Case insensitive comparison of strings. These functions work almost + identically to :c:func:`!strcmp` and :c:func:`!strncmp` (respectively), + except that they ignore the case of ASCII characters. + + Return ``0`` if the strings are equal, a negative value if *str1* sorts + lexicographically before *str2*, or a positive value if it sorts after. + + In the *str1* or *str2* arguments, a NUL byte marks the end of the string. + For :c:func:`!PyOS_mystrnicmp`, the *size* argument gives the maximum size + of the string, as if NUL was present at the index given by *size*. + + These functions do not use the locale. -.. c:function:: int PyOS_strnicmp(const char *s1, const char *s2, Py_ssize_t size) +.. c:function:: int PyOS_stricmp(const char *str1, const char *str2) + int PyOS_strnicmp(const char *str1, const char *str2, Py_ssize_t size) - Case insensitive comparison of strings. The function works almost - identically to :c:func:`!strncmp` except that it ignores the case. + Case insensitive comparison of strings. + + On Windows, these are aliases of :c:func:`!stricmp` and :c:func:`!strnicmp`, + respectively. + + On other platforms, they are aliases of :c:func:`PyOS_mystricmp` and + :c:func:`PyOS_mystrnicmp`, respectively. + + +Character classification and conversion +======================================= + +The following macros provide locale-independent (unlike the C standard library +``ctype.h``) character classification and conversion. +The argument must be a signed or unsigned :c:expr:`char`. + + +.. c:macro:: Py_ISALNUM(c) + + Return true if the character *c* is an alphanumeric character. + + +.. c:macro:: Py_ISALPHA(c) + + Return true if the character *c* is an alphabetic character (``a-z`` and ``A-Z``). + + +.. c:macro:: Py_ISDIGIT(c) + + Return true if the character *c* is a decimal digit (``0-9``). + + +.. c:macro:: Py_ISLOWER(c) + + Return true if the character *c* is a lowercase ASCII letter (``a-z``). + + +.. c:macro:: Py_ISUPPER(c) + + Return true if the character *c* is an uppercase ASCII letter (``A-Z``). + + +.. c:macro:: Py_ISSPACE(c) + + Return true if the character *c* is a whitespace character (space, tab, + carriage return, newline, vertical tab, or form feed). + + +.. c:macro:: Py_ISXDIGIT(c) + + Return true if the character *c* is a hexadecimal digit (``0-9``, ``a-f``, and + ``A-F``). + + +.. c:macro:: Py_TOLOWER(c) + + Return the lowercase equivalent of the character *c*. + + +.. c:macro:: Py_TOUPPER(c) + + Return the uppercase equivalent of the character *c*. diff --git a/Doc/c-api/curses.rst b/Doc/c-api/curses.rst new file mode 100644 index 00000000000..5a1697c43cc --- /dev/null +++ b/Doc/c-api/curses.rst @@ -0,0 +1,138 @@ +.. highlight:: c + +Curses C API +------------ + +:mod:`curses` exposes a small C interface for extension modules. +Consumers must include the header file :file:`py_curses.h` (which is not +included by default by :file:`Python.h`) and :c:func:`import_curses` must +be invoked, usually as part of the module initialisation function, to populate +:c:var:`PyCurses_API`. + +.. warning:: + + Neither the C API nor the pure Python :mod:`curses` module are compatible + with subinterpreters. + +.. c:macro:: import_curses() + + Import the curses C API. The macro does not need a semi-colon to be called. + + On success, populate the :c:var:`PyCurses_API` pointer. + + On failure, set :c:var:`PyCurses_API` to NULL and set an exception. + The caller must check if an error occurred via :c:func:`PyErr_Occurred`: + + .. code-block:: + + import_curses(); // semi-colon is optional but recommended + if (PyErr_Occurred()) { /* cleanup */ } + + +.. c:var:: void **PyCurses_API + + Dynamically allocated object containing the curses C API. + This variable is only available once :c:macro:`import_curses` succeeds. + + ``PyCurses_API[0]`` corresponds to :c:data:`PyCursesWindow_Type`. + + ``PyCurses_API[1]``, ``PyCurses_API[2]``, and ``PyCurses_API[3]`` + are pointers to predicate functions of type ``int (*)(void)``. + + When called, these predicates return whether :func:`curses.setupterm`, + :func:`curses.initscr`, and :func:`curses.start_color` have been called + respectively. + + See also the convenience macros :c:macro:`PyCursesSetupTermCalled`, + :c:macro:`PyCursesInitialised`, and :c:macro:`PyCursesInitialisedColor`. + + .. note:: + + The number of entries in this structure is subject to changes. + Consider using :c:macro:`PyCurses_API_pointers` to check if + new fields are available or not. + + +.. c:macro:: PyCurses_API_pointers + + The number of accessible fields (``4``) in :c:var:`PyCurses_API`. + This number is incremented whenever new fields are added. + + +.. c:var:: PyTypeObject PyCursesWindow_Type + + The :ref:`heap type ` corresponding to :class:`curses.window`. + + +.. c:function:: int PyCursesWindow_Check(PyObject *op) + + Return true if *op* is a :class:`curses.window` instance, false otherwise. + + +The following macros are convenience macros expanding into C statements. +In particular, they can only be used as ``macro;`` or ``macro``, but not +``macro()`` or ``macro();``. + +.. c:macro:: PyCursesSetupTermCalled + + Macro checking if :func:`curses.setupterm` has been called. + + The macro expansion is roughly equivalent to: + + .. code-block:: + + { + typedef int (*predicate_t)(void); + predicate_t was_setupterm_called = (predicate_t)PyCurses_API[1]; + if (!was_setupterm_called()) { + return NULL; + } + } + + +.. c:macro:: PyCursesInitialised + + Macro checking if :func:`curses.initscr` has been called. + + The macro expansion is roughly equivalent to: + + .. code-block:: + + { + typedef int (*predicate_t)(void); + predicate_t was_initscr_called = (predicate_t)PyCurses_API[2]; + if (!was_initscr_called()) { + return NULL; + } + } + + +.. c:macro:: PyCursesInitialisedColor + + Macro checking if :func:`curses.start_color` has been called. + + The macro expansion is roughly equivalent to: + + .. code-block:: + + { + typedef int (*predicate_t)(void); + predicate_t was_start_color_called = (predicate_t)PyCurses_API[3]; + if (!was_start_color_called()) { + return NULL; + } + } + + +Internal data +------------- + +The following objects are exposed by the C API but should be considered +internal-only. + +.. c:macro:: PyCurses_CAPSULE_NAME + + Name of the curses capsule to pass to :c:func:`PyCapsule_Import`. + + Internal usage only. Use :c:macro:`import_curses` instead. + diff --git a/Doc/c-api/datetime.rst b/Doc/c-api/datetime.rst index d2d4d5309c7..127d7c9c91a 100644 --- a/Doc/c-api/datetime.rst +++ b/Doc/c-api/datetime.rst @@ -8,11 +8,42 @@ DateTime Objects Various date and time objects are supplied by the :mod:`datetime` module. Before using any of these functions, the header file :file:`datetime.h` must be included in your source (note that this is not included by :file:`Python.h`), -and the macro :c:macro:`!PyDateTime_IMPORT` must be invoked, usually as part of +and the macro :c:macro:`PyDateTime_IMPORT` must be invoked, usually as part of the module initialisation function. The macro puts a pointer to a C structure -into a static variable, :c:data:`!PyDateTimeAPI`, that is used by the following +into a static variable, :c:data:`PyDateTimeAPI`, that is used by the following macros. +.. c:macro:: PyDateTime_IMPORT() + + Import the datetime C API. + + On success, populate the :c:var:`PyDateTimeAPI` pointer. + On failure, set :c:var:`PyDateTimeAPI` to ``NULL`` and set an exception. + The caller must check if an error occurred via :c:func:`PyErr_Occurred`: + + .. code-block:: + + PyDateTime_IMPORT; + if (PyErr_Occurred()) { /* cleanup */ } + + .. warning:: + + This is not compatible with subinterpreters. + +.. c:type:: PyDateTime_CAPI + + Structure containing the fields for the datetime C API. + + The fields of this structure are private and subject to change. + + Do not use this directly; prefer ``PyDateTime_*`` APIs instead. + +.. c:var:: PyDateTime_CAPI *PyDateTimeAPI + + Dynamically allocated object containing the datetime C API. + + This variable is only available once :c:macro:`PyDateTime_IMPORT` succeeds. + .. c:type:: PyDateTime_Date This subtype of :c:type:`PyObject` represents a Python date object. @@ -46,7 +77,7 @@ macros. .. c:var:: PyTypeObject PyDateTime_DeltaType - This instance of :c:type:`PyTypeObject` represents Python type for + This instance of :c:type:`PyTypeObject` represents the Python type for the difference between two datetime values; it is the same object as :class:`datetime.timedelta` in the Python layer. @@ -325,3 +356,16 @@ Macros for the convenience of modules implementing the DB API: Create and return a new :class:`datetime.date` object given an argument tuple suitable for passing to :meth:`datetime.date.fromtimestamp`. + + +Internal data +------------- + +The following symbols are exposed by the C API but should be considered +internal-only. + +.. c:macro:: PyDateTime_CAPSULE_NAME + + Name of the datetime capsule to pass to :c:func:`PyCapsule_Import`. + + Internal usage only. Use :c:macro:`PyDateTime_IMPORT` instead. diff --git a/Doc/c-api/descriptor.rst b/Doc/c-api/descriptor.rst index b32c113e5f0..313c534545a 100644 --- a/Doc/c-api/descriptor.rst +++ b/Doc/c-api/descriptor.rst @@ -21,20 +21,104 @@ found in the dictionary of type objects. .. c:function:: PyObject* PyDescr_NewMember(PyTypeObject *type, struct PyMemberDef *meth) +.. c:var:: PyTypeObject PyMemberDescr_Type + + The type object for member descriptor objects created from + :c:type:`PyMemberDef` structures. These descriptors expose fields of a + C struct as attributes on a type, and correspond + to :class:`types.MemberDescriptorType` objects in Python. + + + +.. c:var:: PyTypeObject PyGetSetDescr_Type + + The type object for get/set descriptor objects created from + :c:type:`PyGetSetDef` structures. These descriptors implement attributes + whose value is computed by C getter and setter functions, and are used + for many built-in type attributes. + + .. c:function:: PyObject* PyDescr_NewMethod(PyTypeObject *type, struct PyMethodDef *meth) +.. c:var:: PyTypeObject PyMethodDescr_Type + + The type object for method descriptor objects created from + :c:type:`PyMethodDef` structures. These descriptors expose C functions as + methods on a type, and correspond to :class:`types.MemberDescriptorType` + objects in Python. + + .. c:function:: PyObject* PyDescr_NewWrapper(PyTypeObject *type, struct wrapperbase *wrapper, void *wrapped) +.. c:var:: PyTypeObject PyWrapperDescr_Type + + The type object for wrapper descriptor objects created by + :c:func:`PyDescr_NewWrapper` and :c:func:`PyWrapper_New`. Wrapper + descriptors are used internally to expose special methods implemented + via wrapper structures, and appear in Python as + :class:`types.WrapperDescriptorType` objects. + + .. c:function:: PyObject* PyDescr_NewClassMethod(PyTypeObject *type, PyMethodDef *method) .. c:function:: int PyDescr_IsData(PyObject *descr) - Return non-zero if the descriptor objects *descr* describes a data attribute, or + Return non-zero if the descriptor object *descr* describes a data attribute, or ``0`` if it describes a method. *descr* must be a descriptor object; there is no error checking. .. c:function:: PyObject* PyWrapper_New(PyObject *, PyObject *) + + +Built-in descriptors +^^^^^^^^^^^^^^^^^^^^ + +.. c:var:: PyTypeObject PySuper_Type + + The type object for super objects. This is the same object as + :class:`super` in the Python layer. + + +.. c:var:: PyTypeObject PyClassMethod_Type + + The type of class method objects. This is the same object as + :class:`classmethod` in the Python layer. + + +.. c:var:: PyTypeObject PyClassMethodDescr_Type + + The type object for C-level class method descriptor objects. + This is the type of the descriptors created for :func:`classmethod` defined in + C extension types, and is the same object as :class:`classmethod` + in Python. + + +.. c:function:: PyObject *PyClassMethod_New(PyObject *callable) + + Create a new :class:`classmethod` object wrapping *callable*. + *callable* must be a callable object and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to a new class + method descriptor. On failure, this function returns ``NULL`` with an + exception set. + + +.. c:var:: PyTypeObject PyStaticMethod_Type + + The type of static method objects. This is the same object as + :class:`staticmethod` in the Python layer. + + +.. c:function:: PyObject *PyStaticMethod_New(PyObject *callable) + + Create a new :class:`staticmethod` object wrapping *callable*. + *callable* must be a callable object and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to a new static + method descriptor. On failure, this function returns ``NULL`` with an + exception set. + diff --git a/Doc/c-api/dict.rst b/Doc/c-api/dict.rst index 0abbd662dad..9c4428ced41 100644 --- a/Doc/c-api/dict.rst +++ b/Doc/c-api/dict.rst @@ -43,6 +43,17 @@ Dictionary Objects prevent modification of the dictionary for non-dynamic class types. +.. c:var:: PyTypeObject PyDictProxy_Type + + The type object for mapping proxy objects created by + :c:func:`PyDictProxy_New` and for the read-only ``__dict__`` attribute + of many built-in types. A :c:type:`PyDictProxy_Type` instance provides a + dynamic, read-only view of an underlying dictionary: changes to the + underlying dictionary are reflected in the proxy, but the proxy itself + does not support mutation operations. This corresponds to + :class:`types.MappingProxyType` in Python. + + .. c:function:: void PyDict_Clear(PyObject *p) Empty an existing dictionary of all key-value pairs. @@ -245,6 +256,11 @@ Dictionary Objects ``len(p)`` on a dictionary. +.. c:function:: Py_ssize_t PyDict_GET_SIZE(PyObject *p) + + Similar to :c:func:`PyDict_Size`, but without error checking. + + .. c:function:: int PyDict_Next(PyObject *p, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) Iterate over all key-value pairs in the dictionary *p*. The @@ -426,3 +442,138 @@ Dictionary Objects it before returning. .. versionadded:: 3.12 + + +Dictionary View Objects +^^^^^^^^^^^^^^^^^^^^^^^ + +.. c:function:: int PyDictViewSet_Check(PyObject *op) + + Return true if *op* is a view of a set inside a dictionary. This is currently + equivalent to :c:expr:`PyDictKeys_Check(op) || PyDictItems_Check(op)`. This + function always succeeds. + + +.. c:var:: PyTypeObject PyDictKeys_Type + + Type object for a view of dictionary keys. In Python, this is the type of + the object returned by :meth:`dict.keys`. + + +.. c:function:: int PyDictKeys_Check(PyObject *op) + + Return true if *op* is an instance of a dictionary keys view. This function + always succeeds. + + +.. c:var:: PyTypeObject PyDictValues_Type + + Type object for a view of dictionary values. In Python, this is the type of + the object returned by :meth:`dict.values`. + + +.. c:function:: int PyDictValues_Check(PyObject *op) + + Return true if *op* is an instance of a dictionary values view. This function + always succeeds. + + +.. c:var:: PyTypeObject PyDictItems_Type + + Type object for a view of dictionary items. In Python, this is the type of + the object returned by :meth:`dict.items`. + + +.. c:function:: int PyDictItems_Check(PyObject *op) + + Return true if *op* is an instance of a dictionary items view. This function + always succeeds. + + +Ordered Dictionaries +^^^^^^^^^^^^^^^^^^^^ + +Python's C API provides interface for :class:`collections.OrderedDict` from C. +Since Python 3.7, dictionaries are ordered by default, so there is usually +little need for these functions; prefer ``PyDict*`` where possible. + + +.. c:var:: PyTypeObject PyODict_Type + + Type object for ordered dictionaries. This is the same object as + :class:`collections.OrderedDict` in the Python layer. + + +.. c:function:: int PyODict_Check(PyObject *od) + + Return true if *od* is an ordered dictionary object or an instance of a + subtype of the :class:`~collections.OrderedDict` type. This function + always succeeds. + + +.. c:function:: int PyODict_CheckExact(PyObject *od) + + Return true if *od* is an ordered dictionary object, but not an instance of + a subtype of the :class:`~collections.OrderedDict` type. + This function always succeeds. + + +.. c:var:: PyTypeObject PyODictKeys_Type + + Analogous to :c:type:`PyDictKeys_Type` for ordered dictionaries. + + +.. c:var:: PyTypeObject PyODictValues_Type + + Analogous to :c:type:`PyDictValues_Type` for ordered dictionaries. + + +.. c:var:: PyTypeObject PyODictItems_Type + + Analogous to :c:type:`PyDictItems_Type` for ordered dictionaries. + + +.. c:function:: PyObject *PyODict_New(void) + + Return a new empty ordered dictionary, or ``NULL`` on failure. + + This is analogous to :c:func:`PyDict_New`. + + +.. c:function:: int PyODict_SetItem(PyObject *od, PyObject *key, PyObject *value) + + Insert *value* into the ordered dictionary *od* with a key of *key*. + Return ``0`` on success or ``-1`` with an exception set on failure. + + This is analogous to :c:func:`PyDict_SetItem`. + + +.. c:function:: int PyODict_DelItem(PyObject *od, PyObject *key) + + Remove the entry in the ordered dictionary *od* with key *key*. + Return ``0`` on success or ``-1`` with an exception set on failure. + + This is analogous to :c:func:`PyDict_DelItem`. + + +These are :term:`soft deprecated` aliases to ``PyDict`` APIs: + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * ``PyODict`` + * ``PyDict`` + * * .. c:macro:: PyODict_GetItem(od, key) + * :c:func:`PyDict_GetItem` + * * .. c:macro:: PyODict_GetItemWithError(od, key) + * :c:func:`PyDict_GetItemWithError` + * * .. c:macro:: PyODict_GetItemString(od, key) + * :c:func:`PyDict_GetItemString` + * * .. c:macro:: PyODict_Contains(od, key) + * :c:func:`PyDict_Contains` + * * .. c:macro:: PyODict_Size(od) + * :c:func:`PyDict_Size` + * * .. c:macro:: PyODict_SIZE(od) + * :c:func:`PyDict_GET_SIZE` diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 3ff4631a8e5..d7fe9e2c9ec 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -309,6 +309,14 @@ For convenience, some of these functions will always return a .. versionadded:: 3.4 +.. c:function:: void PyErr_RangedSyntaxLocationObject(PyObject *filename, int lineno, int col_offset, int end_lineno, int end_col_offset) + + Similar to :c:func:`PyErr_SyntaxLocationObject`, but also sets the + *end_lineno* and *end_col_offset* information for the current exception. + + .. versionadded:: 3.10 + + .. c:function:: void PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset) Like :c:func:`PyErr_SyntaxLocationObject`, but *filename* is a byte string @@ -331,6 +339,23 @@ For convenience, some of these functions will always return a use. +.. c:function:: PyObject *PyErr_ProgramTextObject(PyObject *filename, int lineno) + + Get the source line in *filename* at line *lineno*. *filename* should be a + Python :class:`str` object. + + On success, this function returns a Python string object with the found line. + On failure, this function returns ``NULL`` without an exception set. + + +.. c:function:: PyObject *PyErr_ProgramText(const char *filename, int lineno) + + Similar to :c:func:`PyErr_ProgramTextObject`, but *filename* is a + :c:expr:`const char *`, which is decoded with the + :term:`filesystem encoding and error handler`, instead of a + Python object reference. + + Issuing warnings ================ @@ -394,6 +419,15 @@ an error value). .. versionadded:: 3.2 +.. c:function:: int PyErr_WarnExplicitFormat(PyObject *category, const char *filename, int lineno, const char *module, PyObject *registry, const char *format, ...) + + Similar to :c:func:`PyErr_WarnExplicit`, but uses + :c:func:`PyUnicode_FromFormat` to format the warning message. *format* is + an ASCII-encoded string. + + .. versionadded:: 3.2 + + .. c:function:: int PyErr_ResourceWarning(PyObject *source, Py_ssize_t stack_level, const char *format, ...) Function similar to :c:func:`PyErr_WarnFormat`, but *category* is @@ -762,6 +796,17 @@ Exception Classes Exception Objects ================= +.. c:function:: int PyExceptionInstance_Check(PyObject *op) + + Return true if *op* is an instance of :class:`BaseException`, false + otherwise. This function always succeeds. + + +.. c:macro:: PyExceptionInstance_Class(op) + + Equivalent to :c:func:`Py_TYPE(op) `. + + .. c:function:: PyObject* PyException_GetTraceback(PyObject *ex) Return the traceback associated with the exception as a new reference, as @@ -939,6 +984,9 @@ because the :ref:`call protocol ` takes care of recursion handling. be concatenated to the :exc:`RecursionError` message caused by the recursion depth limit. + .. seealso:: + The :c:func:`PyUnstable_ThreadState_SetStackProtection` function. + .. versionchanged:: 3.9 This function is now also available in the :ref:`limited API `. @@ -979,6 +1027,27 @@ these are the C equivalent to :func:`reprlib.recursive_repr`. Ends a :c:func:`Py_ReprEnter`. Must be called once for each invocation of :c:func:`Py_ReprEnter` that returns zero. +.. c:function:: int Py_GetRecursionLimit(void) + + Get the recursion limit for the current interpreter. It can be set with + :c:func:`Py_SetRecursionLimit`. The recursion limit prevents the + Python interpreter stack from growing infinitely. + + This function cannot fail, and the caller must hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`sys.getrecursionlimit` + +.. c:function:: void Py_SetRecursionLimit(int new_limit) + + Set the recursion limit for the current interpreter. + + This function cannot fail, and the caller must hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`sys.setrecursionlimit` .. _standardexceptions: @@ -1207,3 +1276,37 @@ Warning types .. versionadded:: 3.10 :c:data:`PyExc_EncodingWarning`. + + +Tracebacks +========== + +.. c:var:: PyTypeObject PyTraceBack_Type + + Type object for traceback objects. This is available as + :class:`types.TracebackType` in the Python layer. + + +.. c:function:: int PyTraceBack_Check(PyObject *op) + + Return true if *op* is a traceback object, false otherwise. This function + does not account for subtypes. + + +.. c:function:: int PyTraceBack_Here(PyFrameObject *f) + + Replace the :attr:`~BaseException.__traceback__` attribute on the current + exception with a new traceback prepending *f* to the existing chain. + + Calling this function without an exception set is undefined behavior. + + This function returns ``0`` on success, and returns ``-1`` with an + exception set on failure. + + +.. c:function:: int PyTraceBack_Print(PyObject *tb, PyObject *f) + + Write the traceback *tb* into the file *f*. + + This function returns ``0`` on success, and returns ``-1`` with an + exception set on failure. diff --git a/Doc/c-api/extension-modules.rst b/Doc/c-api/extension-modules.rst index 3d331e6ec12..0ce173b4bfe 100644 --- a/Doc/c-api/extension-modules.rst +++ b/Doc/c-api/extension-modules.rst @@ -8,7 +8,8 @@ Defining extension modules A C extension for CPython is a shared library (for example, a ``.so`` file on Linux, ``.pyd`` DLL on Windows), which is loadable into the Python process (for example, it is compiled with compatible compiler settings), and which -exports an :ref:`initialization function `. +exports an :dfn:`export hook` function (or an +old-style :ref:`initialization function `). To be importable by default (that is, by :py:class:`importlib.machinery.ExtensionFileLoader`), @@ -23,25 +24,127 @@ and must be named after the module name plus an extension listed in One suitable tool is Setuptools, whose documentation can be found at https://setuptools.pypa.io/en/latest/setuptools.html. -Normally, the initialization function returns a module definition initialized -using :c:func:`PyModuleDef_Init`. -This allows splitting the creation process into several phases: +.. _extension-export-hook: +Extension export hook +..................... + +.. versionadded:: next + + Support for the :samp:`PyModExport_{}` export hook was added in Python + 3.15. The older way of defining modules is still available: consult either + the :ref:`extension-pyinit` section or earlier versions of this + documentation if you plan to support earlier Python versions. + +The export hook must be an exported function with the following signature: + +.. c:function:: PyModuleDef_Slot *PyModExport_modulename(void) + +For modules with ASCII-only names, the :ref:`export hook ` +must be named :samp:`PyModExport_{}`, +with ```` replaced by the module's name. + +For non-ASCII module names, the export hook must instead be named +:samp:`PyModExportU_{}` (note the ``U``), with ```` encoded using +Python's *punycode* encoding with hyphens replaced by underscores. In Python: + +.. code-block:: python + + def hook_name(name): + try: + suffix = b'_' + name.encode('ascii') + except UnicodeEncodeError: + suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') + return b'PyModExport' + suffix + +The export hook returns an array of :c:type:`PyModuleDef_Slot` entries, +terminated by an entry with a slot ID of ``0``. +These slots describe how the module should be created and initialized. + +This array must remain valid and constant until interpreter shutdown. +Typically, it should use ``static`` storage. +Prefer using the :c:macro:`Py_mod_create` and :c:macro:`Py_mod_exec` slots +for any dynamic behavior. + +The export hook may return ``NULL`` with an exception set to signal failure. + +It is recommended to define the export hook function using a helper macro: + +.. c:macro:: PyMODEXPORT_FUNC + + Declare an extension module export hook. + This macro: + + * specifies the :c:expr:`PyModuleDef_Slot*` return type, + * adds any special linkage declarations required by the platform, and + * for C++, declares the function as ``extern "C"``. + +For example, a module called ``spam`` would be defined like this:: + + PyABIInfo_VAR(abi_info); + + static PyModuleDef_Slot spam_slots[] = { + {Py_mod_abi, &abi_info}, + {Py_mod_name, "spam"}, + {Py_mod_init, spam_init_function}, + ... + {0, NULL}, + }; + + PyMODEXPORT_FUNC + PyModExport_spam(void) + { + return spam_slots; + } + +The export hook is typically the only non-\ ``static`` +item defined in the module's C source. + +The hook should be kept short -- ideally, one line as above. +If you do need to use Python C API in this function, it is recommended to call +``PyABIInfo_Check(&abi_info, "modulename")`` first to raise an exception, +rather than crash, in common cases of ABI mismatch. + + +.. note:: + + It is possible to export multiple modules from a single shared library by + defining multiple export hooks. + However, importing them requires a custom importer or suitably named + copies/links of the extension file, because Python's import machinery only + finds the function corresponding to the filename. + See the `Multiple modules in one library `__ + section in :pep:`489` for details. + + +.. _multi-phase-initialization: + +Multi-phase initialization +.......................... + +The process of creating an extension module follows several phases: + +- Python finds and calls the export hook to get information on how to + create the module. - Before any substantial code is executed, Python can determine which capabilities the module supports, and it can adjust the environment or refuse loading an incompatible extension. -- By default, Python itself creates the module object -- that is, it does - the equivalent of :py:meth:`object.__new__` for classes. - It also sets initial attributes like :attr:`~module.__package__` and - :attr:`~module.__loader__`. -- Afterwards, the module object is initialized using extension-specific - code -- the equivalent of :py:meth:`~object.__init__` on classes. + Slots like :c:data:`Py_mod_abi`, :c:data:`Py_mod_gil` and + :c:data:`Py_mod_multiple_interpreters` influence this step. +- By default, Python itself then creates the module object -- that is, it does + the equivalent of calling :py:meth:`~object.__new__` when creating an object. + This step can be overridden using the :c:data:`Py_mod_create` slot. +- Python sets initial module attributes like :attr:`~module.__package__` and + :attr:`~module.__loader__`, and inserts the module object into + :py:attr:`sys.modules`. +- Afterwards, the module object is initialized in an extension-specific way + -- the equivalent of :py:meth:`~object.__init__` when creating an object, + or of executing top-level code in a Python-language module. + The behavior is specified using the :c:data:`Py_mod_exec` slot. This is called *multi-phase initialization* to distinguish it from the legacy -(but still supported) *single-phase initialization* scheme, -where the initialization function returns a fully constructed module. -See the :ref:`single-phase-initialization section below ` -for details. +(but still supported) :ref:`single-phase initialization `, +where an initialization function returns a fully constructed module. .. versionchanged:: 3.5 @@ -53,7 +156,7 @@ Multiple module instances By default, extension modules are not singletons. For example, if the :py:attr:`sys.modules` entry is removed and the module -is re-imported, a new module object is created, and typically populated with +is re-imported, a new module object is created and, typically, populated with fresh method and type objects. The old module is subject to normal garbage collection. This mirrors the behavior of pure-Python modules. @@ -83,36 +186,34 @@ A module may also be limited to the main interpreter using the :c:data:`Py_mod_multiple_interpreters` slot. -.. _extension-export-hook: +.. _extension-pyinit: -Initialization function -....................... +``PyInit`` function +................... -The initialization function defined by an extension module has the -following signature: +.. deprecated:: next + + This functionality is :term:`soft deprecated`. + It will not get new features, but there are no plans to remove it. + +Instead of :c:func:`PyModExport_modulename`, an extension module can define +an older-style :dfn:`initialization function` with the signature: .. c:function:: PyObject* PyInit_modulename(void) Its name should be :samp:`PyInit_{}`, with ```` replaced by the name of the module. +For non-ASCII module names, use :samp:`PyInitU_{}` instead, with +```` encoded in the same way as for the +:ref:`export hook ` (that is, using Punycode +with underscores). -For modules with ASCII-only names, the function must instead be named -:samp:`PyInit_{}`, with ```` replaced by the name of the module. -When using :ref:`multi-phase-initialization`, non-ASCII module names -are allowed. In this case, the initialization function name is -:samp:`PyInitU_{}`, with ```` encoded using Python's -*punycode* encoding with hyphens replaced by underscores. In Python: +If a module exports both :samp:`PyInit_{}` and +:samp:`PyModExport_{}`, the :samp:`PyInit_{}` function +is ignored. -.. code-block:: python - - def initfunc_name(name): - try: - suffix = b'_' + name.encode('ascii') - except UnicodeEncodeError: - suffix = b'U_' + name.encode('punycode').replace(b'-', b'_') - return b'PyInit' + suffix - -It is recommended to define the initialization function using a helper macro: +Like with :c:macro:`PyMODEXPORT_FUNC`, it is recommended to define the +initialization function using a helper macro: .. c:macro:: PyMODINIT_FUNC @@ -123,6 +224,34 @@ It is recommended to define the initialization function using a helper macro: * adds any special linkage declarations required by the platform, and * for C++, declares the function as ``extern "C"``. + +Normally, the initialization function (``PyInit_modulename``) returns +a :c:type:`PyModuleDef` instance with non-``NULL`` +:c:member:`~PyModuleDef.m_slots`. This allows Python to use +:ref:`multi-phase initialization `. + +Before it is returned, the ``PyModuleDef`` instance must be initialized +using the following function: + +.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) + + Ensure a module definition is a properly initialized Python object that + correctly reports its type and a reference count. + + Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. + + Calling this function is required before returning a :c:type:`PyModuleDef` + from a module initialization function. + It should not be used in other contexts. + + Note that Python assumes that ``PyModuleDef`` structures are statically + allocated. + This function may return either a new reference or a borrowed one; + this reference must not be released. + + .. versionadded:: 3.5 + + For example, a module called ``spam`` would be defined like this:: static struct PyModuleDef spam_module = { @@ -137,59 +266,23 @@ For example, a module called ``spam`` would be defined like this:: return PyModuleDef_Init(&spam_module); } -It is possible to export multiple modules from a single shared library by -defining multiple initialization functions. However, importing them requires -using symbolic links or a custom importer, because by default only the -function corresponding to the filename is found. -See the `Multiple modules in one library `__ -section in :pep:`489` for details. - -The initialization function is typically the only non-\ ``static`` -item defined in the module's C source. - - -.. _multi-phase-initialization: - -Multi-phase initialization -.......................... - -Normally, the :ref:`initialization function ` -(``PyInit_modulename``) returns a :c:type:`PyModuleDef` instance with -non-``NULL`` :c:member:`~PyModuleDef.m_slots`. -Before it is returned, the ``PyModuleDef`` instance must be initialized -using the following function: - - -.. c:function:: PyObject* PyModuleDef_Init(PyModuleDef *def) - - Ensure a module definition is a properly initialized Python object that - correctly reports its type and a reference count. - - Return *def* cast to ``PyObject*``, or ``NULL`` if an error occurred. - - Calling this function is required for :ref:`multi-phase-initialization`. - It should not be used in other contexts. - - Note that Python assumes that ``PyModuleDef`` structures are statically - allocated. - This function may return either a new reference or a borrowed one; - this reference must not be released. - - .. versionadded:: 3.5 - .. _single-phase-initialization: Legacy single-phase initialization -.................................. +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. attention:: - Single-phase initialization is a legacy mechanism to initialize extension +.. deprecated:: next + + Single-phase initialization is :term:`soft deprecated`. + It is a legacy mechanism to initialize extension modules, with known drawbacks and design flaws. Extension module authors are encouraged to use multi-phase initialization instead. -In single-phase initialization, the -:ref:`initialization function ` (``PyInit_modulename``) + However, there are no plans to remove support for it. + +In single-phase initialization, the old-style +:ref:`initializaton function ` (``PyInit_modulename``) should create, populate and return a module object. This is typically done using :c:func:`PyModule_Create` and functions like :c:func:`PyModule_AddObjectRef`. @@ -242,6 +335,8 @@ in the following ways: * Single-phase modules support module lookup functions like :c:func:`PyState_FindModule`. +* The module's :c:member:`PyModuleDef.m_slots` must be NULL. + .. [#testsinglephase] ``_testsinglephase`` is an internal module used in CPython's self-test suite; your installation may or may not include it. diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index e9019a0d500..9d01254ddb2 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -93,6 +93,29 @@ the :mod:`io` APIs instead. .. versionadded:: 3.8 +.. c:function:: PyObject *PyFile_OpenCodeObject(PyObject *path) + + Open *path* with the mode ``'rb'``. *path* must be a Python :class:`str` + object. The behavior of this function may be overridden by + :c:func:`PyFile_SetOpenCodeHook` to allow for some preprocessing of the + text. + + This is analogous to :func:`io.open_code` in Python. + + On success, this function returns a :term:`strong reference` to a Python + file object. On failure, this function returns ``NULL`` with an exception + set. + + .. versionadded:: 3.8 + + +.. c:function:: PyObject *PyFile_OpenCode(const char *path) + + Similar to :c:func:`PyFile_OpenCodeObject`, but *path* is a + UTF-8 encoded :c:expr:`const char*`. + + .. versionadded:: 3.8 + .. c:function:: int PyFile_WriteObject(PyObject *obj, PyObject *p, int flags) diff --git a/Doc/c-api/float.rst b/Doc/c-api/float.rst index 489676caa3a..b0d440580b9 100644 --- a/Doc/c-api/float.rst +++ b/Doc/c-api/float.rst @@ -78,6 +78,111 @@ Floating-Point Objects Return the minimum normalized positive float *DBL_MIN* as C :c:expr:`double`. +.. c:macro:: Py_INFINITY + + This macro expands a to constant expression of type :c:expr:`double`, that + represents the positive infinity. + + It is equivalent to the :c:macro:`!INFINITY` macro from the C11 standard + ```` header. + + .. deprecated:: 3.15 + The macro is :term:`soft deprecated`. + + +.. c:macro:: Py_NAN + + This macro expands a to constant expression of type :c:expr:`double`, that + represents a quiet not-a-number (qNaN) value. + + On most platforms, this is equivalent to the :c:macro:`!NAN` macro from + the C11 standard ```` header. + + +.. c:macro:: Py_HUGE_VAL + + Equivalent to :c:macro:`!INFINITY`. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. + + +.. c:macro:: Py_MATH_E + + The definition (accurate for a :c:expr:`double` type) of the :data:`math.e` constant. + + +.. c:macro:: Py_MATH_El + + High precision (long double) definition of :data:`~math.e` constant. + + .. deprecated-removed:: 3.15 3.20 + + +.. c:macro:: Py_MATH_PI + + The definition (accurate for a :c:expr:`double` type) of the :data:`math.pi` constant. + + +.. c:macro:: Py_MATH_PIl + + High precision (long double) definition of :data:`~math.pi` constant. + + .. deprecated-removed:: 3.15 3.20 + + +.. c:macro:: Py_MATH_TAU + + The definition (accurate for a :c:expr:`double` type) of the :data:`math.tau` constant. + + .. versionadded:: 3.6 + + +.. c:macro:: Py_RETURN_NAN + + Return :data:`math.nan` from a function. + + On most platforms, this is equivalent to ``return PyFloat_FromDouble(NAN)``. + + +.. c:macro:: Py_RETURN_INF(sign) + + Return :data:`math.inf` or :data:`-math.inf ` from a function, + depending on the sign of *sign*. + + On most platforms, this is equivalent to the following:: + + return PyFloat_FromDouble(copysign(INFINITY, sign)); + + +.. c:macro:: Py_IS_FINITE(X) + + Return ``1`` if the given floating-point number *X* is finite, + that is, it is normal, subnormal or zero, but not infinite or NaN. + Return ``0`` otherwise. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. Use :c:macro:`!isfinite` instead. + + +.. c:macro:: Py_IS_INFINITY(X) + + Return ``1`` if the given floating-point number *X* is positive or negative + infinity. Return ``0`` otherwise. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. Use :c:macro:`!isinf` instead. + + +.. c:macro:: Py_IS_NAN(X) + + Return ``1`` if the given floating-point number *X* is a not-a-number (NaN) + value. Return ``0`` otherwise. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. Use :c:macro:`!isnan` instead. + + Pack and Unpack functions ------------------------- @@ -96,8 +201,8 @@ NaNs (if such things exist on the platform) isn't handled correctly, and attempting to unpack a bytes string containing an IEEE INF or NaN will raise an exception. -Note that NaNs type may not be preserved on IEEE platforms (silent NaN become -quiet), for example on x86 systems in 32-bit mode. +Note that NaNs type may not be preserved on IEEE platforms (signaling NaN become +quiet NaN), for example on x86 systems in 32-bit mode. On non-IEEE platforms with more precision, or larger dynamic range, than IEEE 754 supports, not all values can be packed; on non-IEEE platforms with less diff --git a/Doc/c-api/frame.rst b/Doc/c-api/frame.rst index 1a52e146a69..fb17cf7f1da 100644 --- a/Doc/c-api/frame.rst +++ b/Doc/c-api/frame.rst @@ -29,6 +29,12 @@ See also :ref:`Reflection `. Previously, this type was only available after including ````. +.. c:function:: PyFrameObject *PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, PyObject *locals) + + Create a new frame object. This function returns a :term:`strong reference` + to the new frame object on success, and returns ``NULL`` with an exception + set on failure. + .. c:function:: int PyFrame_Check(PyObject *obj) Return non-zero if *obj* is a frame object. @@ -161,6 +167,57 @@ See :pep:`667` for more information. Return non-zero if *obj* is a frame :func:`locals` proxy. + +Legacy Local Variable APIs +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These APIs are :term:`soft deprecated`. As of Python 3.13, they do nothing. +They exist solely for backwards compatibility. + + +.. c:function:: void PyFrame_LocalsToFast(PyFrameObject *f, int clear) + + This function is :term:`soft deprecated` and does nothing. + + Prior to Python 3.13, this function would copy the :attr:`~frame.f_locals` + attribute of *f* to the internal "fast" array of local variables, allowing + changes in frame objects to be visible to the interpreter. If *clear* was + true, this function would process variables that were unset in the locals + dictionary. + + .. versionchanged:: 3.13 + This function now does nothing. + + +.. c:function:: void PyFrame_FastToLocals(PyFrameObject *f) + + This function is :term:`soft deprecated` and does nothing. + + Prior to Python 3.13, this function would copy the internal "fast" array + of local variables (which is used by the interpreter) to the + :attr:`~frame.f_locals` attribute of *f*, allowing changes in local + variables to be visible to frame objects. + + .. versionchanged:: 3.13 + This function now does nothing. + + +.. c:function:: int PyFrame_FastToLocalsWithError(PyFrameObject *f) + + This function is :term:`soft deprecated` and does nothing. + + Prior to Python 3.13, this function was similar to + :c:func:`PyFrame_FastToLocals`, but would return ``0`` on success, and + ``-1`` with an exception set on failure. + + .. versionchanged:: 3.13 + This function now does nothing. + + +.. seealso:: + :pep:`667` + + Internal Frames ^^^^^^^^^^^^^^^ diff --git a/Doc/c-api/function.rst b/Doc/c-api/function.rst index 764b2ac610b..609b5e885b6 100644 --- a/Doc/c-api/function.rst +++ b/Doc/c-api/function.rst @@ -102,6 +102,15 @@ There are a few functions specific to Python functions. dictionary of arguments or ``NULL``. +.. c:function:: int PyFunction_SetKwDefaults(PyObject *op, PyObject *defaults) + + Set the keyword-only argument default values of the function object *op*. + *defaults* must be a dictionary of keyword-only arguments or ``Py_None``. + + This function returns ``0`` on success, and returns ``-1`` with an exception + set on failure. + + .. c:function:: PyObject* PyFunction_GetClosure(PyObject *op) Return the closure associated with the function object *op*. This can be ``NULL`` @@ -200,7 +209,7 @@ There are a few functions specific to Python functions. runtime behavior depending on optimization decisions, it does not change the semantics of the Python code being executed. - If *event* is ``PyFunction_EVENT_DESTROY``, Taking a reference in the + If *event* is ``PyFunction_EVENT_DESTROY``, taking a reference in the callback to the about-to-be-destroyed function will resurrect it, preventing it from being freed at this time. When the resurrected object is destroyed later, any watcher callbacks active at that time will be called again. diff --git a/Doc/c-api/gen.rst b/Doc/c-api/gen.rst index 0eb5922f6da..44f3bdbf959 100644 --- a/Doc/c-api/gen.rst +++ b/Doc/c-api/gen.rst @@ -44,3 +44,41 @@ than explicitly calling :c:func:`PyGen_New` or :c:func:`PyGen_NewWithQualName`. with ``__name__`` and ``__qualname__`` set to *name* and *qualname*. A reference to *frame* is stolen by this function. The *frame* argument must not be ``NULL``. + +.. c:function:: PyCodeObject* PyGen_GetCode(PyGenObject *gen) + + Return a new :term:`strong reference` to the code object wrapped by *gen*. + This function always succeeds. + + +Asynchronous Generator Objects +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. seealso:: + :pep:`525` + +.. c:var:: PyTypeObject PyAsyncGen_Type + + The type object corresponding to asynchronous generator objects. This is + available as :class:`types.AsyncGeneratorType` in the Python layer. + + .. versionadded:: 3.6 + +.. c:function:: PyObject *PyAsyncGen_New(PyFrameObject *frame, PyObject *name, PyObject *qualname) + + Create a new asynchronous generator wrapping *frame*, with ``__name__`` and + ``__qualname__`` set to *name* and *qualname*. *frame* is stolen by this + function and must not be ``NULL``. + + On success, this function returns a :term:`strong reference` to the + new asynchronous generator. On failure, this function returns ``NULL`` + with an exception set. + + .. versionadded:: 3.6 + +.. c:function:: int PyAsyncGen_CheckExact(PyObject *op) + + Return true if *op* is an asynchronous generator object, false otherwise. + This function always succeeds. + + .. versionadded:: 3.6 diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index b5fe93573a1..1ad712b0ce4 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -11,42 +11,98 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. .. versionadded:: 3.2 + .. c:type:: Py_uhash_t Hash value type: unsigned integer. .. versionadded:: 3.2 + +.. c:macro:: Py_HASH_ALGORITHM + + A numerical value indicating the algorithm for hashing of :class:`str`, + :class:`bytes`, and :class:`memoryview`. + + The algorithm name is exposed by :data:`sys.hash_info.algorithm`. + + .. versionadded:: 3.4 + + +.. c:macro:: Py_HASH_FNV + Py_HASH_SIPHASH24 + Py_HASH_SIPHASH13 + + Numerical values to compare to :c:macro:`Py_HASH_ALGORITHM` to determine + which algorithm is used for hashing. The hash algorithm can be configured + via the configure :option:`--with-hash-algorithm` option. + + .. versionadded:: 3.4 + Add :c:macro:`!Py_HASH_FNV` and :c:macro:`!Py_HASH_SIPHASH24`. + + .. versionadded:: 3.11 + Add :c:macro:`!Py_HASH_SIPHASH13`. + + +.. c:macro:: Py_HASH_CUTOFF + + Buffers of length in range ``[1, Py_HASH_CUTOFF)`` are hashed using DJBX33A + instead of the algorithm described by :c:macro:`Py_HASH_ALGORITHM`. + + - A :c:macro:`!Py_HASH_CUTOFF` of 0 disables the optimization. + - :c:macro:`!Py_HASH_CUTOFF` must be non-negative and less or equal than 7. + + 32-bit platforms should use a cutoff smaller than 64-bit platforms because + it is easier to create colliding strings. A cutoff of 7 on 64-bit platforms + and 5 on 32-bit platforms should provide a decent safety margin. + + This corresponds to the :data:`sys.hash_info.cutoff` constant. + + .. versionadded:: 3.4 + + .. c:macro:: PyHASH_MODULUS - The `Mersenne prime `_ ``P = 2**n -1``, used for numeric hash scheme. + The `Mersenne prime `_ ``P = 2**n -1``, + used for numeric hash scheme. + + This corresponds to the :data:`sys.hash_info.modulus` constant. .. versionadded:: 3.13 + .. c:macro:: PyHASH_BITS The exponent ``n`` of ``P`` in :c:macro:`PyHASH_MODULUS`. .. versionadded:: 3.13 + .. c:macro:: PyHASH_MULTIPLIER Prime multiplier used in string and various other hashes. .. versionadded:: 3.13 + .. c:macro:: PyHASH_INF The hash value returned for a positive infinity. + This corresponds to the :data:`sys.hash_info.inf` constant. + .. versionadded:: 3.13 + .. c:macro:: PyHASH_IMAG The multiplier used for the imaginary part of a complex number. + This corresponds to the :data:`sys.hash_info.imag` constant. + .. versionadded:: 3.13 + .. c:type:: PyHash_FuncDef Hash function definition used by :c:func:`PyHash_GetFuncDef`. @@ -59,14 +115,20 @@ See also the :c:member:`PyTypeObject.tp_hash` member and :ref:`numeric-hash`. Hash function name (UTF-8 encoded string). + This corresponds to the :data:`sys.hash_info.algorithm` constant. + .. c:member:: const int hash_bits Internal size of the hash value in bits. + This corresponds to the :data:`sys.hash_info.hash_bits` constant. + .. c:member:: const int seed_bits Size of seed input in bits. + This corresponds to the :data:`sys.hash_info.seed_bits` constant. + .. versionadded:: 3.4 diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 440e4239cca..f60cadab534 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -314,6 +314,13 @@ Importing Modules initialization. +.. c:var:: struct _inittab *PyImport_Inittab + + The table of built-in modules used by Python initialization. Do not use this directly; + use :c:func:`PyImport_AppendInittab` and :c:func:`PyImport_ExtendInittab` + instead. + + .. c:function:: PyObject* PyImport_ImportModuleAttr(PyObject *mod_name, PyObject *attr_name) Import the module *mod_name* and get its attribute *attr_name*. @@ -370,3 +377,24 @@ Importing Modules - ``PyImport_LAZY_NONE`` .. versionadded:: 3.12 + +.. c:function:: PyObject* PyImport_CreateModuleFromInitfunc(PyObject *spec, PyObject* (*initfunc)(void)) + + This function is a building block that enables embedders to implement + the :py:meth:`~importlib.abc.Loader.create_module` step of custom + static extension importers (e.g. of statically-linked extensions). + + *spec* must be a :class:`~importlib.machinery.ModuleSpec` object. + + *initfunc* must be an :ref:`initialization function `, + the same as for :c:func:`PyImport_AppendInittab`. + + On success, create and return a module object. + This module will not be initialized; call :c:func:`PyModule_Exec` + to initialize it. + (Custom importers should do this in their + :py:meth:`~importlib.abc.Loader.exec_module` method.) + + On error, return NULL with an exception set. + + .. versionadded:: 3.15 diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 49ffeab5585..7411644f9e1 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -1366,6 +1366,43 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. versionadded:: 3.11 +.. c:function:: int PyUnstable_ThreadState_SetStackProtection(PyThreadState *tstate, void *stack_start_addr, size_t stack_size) + + Set the stack protection start address and stack protection size + of a Python thread state. + + On success, return ``0``. + On failure, set an exception and return ``-1``. + + CPython implements :ref:`recursion control ` for C code by raising + :py:exc:`RecursionError` when it notices that the machine execution stack is close + to overflow. See for example the :c:func:`Py_EnterRecursiveCall` function. + For this, it needs to know the location of the current thread's stack, which it + normally gets from the operating system. + When the stack is changed, for example using context switching techniques like the + Boost library's ``boost::context``, you must call + :c:func:`~PyUnstable_ThreadState_SetStackProtection` to inform CPython of the change. + + Call :c:func:`~PyUnstable_ThreadState_SetStackProtection` either before + or after changing the stack. + Do not call any other Python C API between the call and the stack + change. + + See :c:func:`PyUnstable_ThreadState_ResetStackProtection` for undoing this operation. + + .. versionadded:: 3.15 + + +.. c:function:: void PyUnstable_ThreadState_ResetStackProtection(PyThreadState *tstate) + + Reset the stack protection start address and stack protection size + of a Python thread state to the operating system defaults. + + See :c:func:`PyUnstable_ThreadState_SetStackProtection` for an explanation. + + .. versionadded:: 3.15 + + .. c:function:: PyInterpreterState* PyInterpreterState_Get(void) Get the current interpreter. @@ -1680,7 +1717,8 @@ function. You can create and destroy them using the following functions: Only C-level static and global variables are shared between these module objects. - * For modules using single-phase initialization, + * For modules using legacy + :ref:`single-phase initialization `, e.g. :c:func:`PyModule_Create`, the first time a particular extension is imported, it is initialized normally, and a (shallow) copy of its module's dictionary is squirreled away. @@ -1854,6 +1892,25 @@ pointer and a void pointer argument. This function now always schedules *func* to be run in the main interpreter. + +.. c:function:: int Py_MakePendingCalls(void) + + Execute all pending calls. This is usually executed automatically by the + interpreter. + + This function returns ``0`` on success, and returns ``-1`` with an exception + set on failure. + + If this is not called in the main thread of the main + interpreter, this function does nothing and returns ``0``. + The caller must hold an :term:`attached thread state`. + + .. versionadded:: 3.1 + + .. versionchanged:: 3.12 + This function only runs pending calls in the main interpreter. + + .. _profiling: Profiling and Tracing @@ -2483,3 +2540,220 @@ code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. In the default build, this macro expands to ``}``. .. versionadded:: 3.13 + + +Legacy Locking APIs +------------------- + +These APIs are obsolete since Python 3.13 with the introduction of +:c:type:`PyMutex`. + +.. versionchanged:: 3.15 + These APIs are now a simple wrapper around ``PyMutex``. + + +.. c:type:: PyThread_type_lock + + A pointer to a mutual exclusion lock. + + +.. c:type:: PyLockStatus + + The result of acquiring a lock with a timeout. + + .. c:namespace:: NULL + + .. c:enumerator:: PY_LOCK_FAILURE + + Failed to acquire the lock. + + .. c:enumerator:: PY_LOCK_ACQUIRED + + The lock was successfully acquired. + + .. c:enumerator:: PY_LOCK_INTR + + The lock was interrupted by a signal. + + +.. c:function:: PyThread_type_lock PyThread_allocate_lock(void) + + Allocate a new lock. + + On success, this function returns a lock; on failure, this + function returns ``0`` without an exception set. + + The caller does not need to hold an :term:`attached thread state`. + + .. versionchanged:: 3.15 + This function now always uses :c:type:`PyMutex`. In prior versions, this + would use a lock provided by the operating system. + + +.. c:function:: void PyThread_free_lock(PyThread_type_lock lock) + + Destroy *lock*. The lock should not be held by any thread when calling + this. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock lock, long long microseconds, int intr_flag) + + Acquire *lock* with a timeout. + + This will wait for *microseconds* microseconds to acquire the lock. If the + timeout expires, this function returns :c:enumerator:`PY_LOCK_FAILURE`. + If *microseconds* is ``-1``, this will wait indefinitely until the lock has + been released. + + If *intr_flag* is ``1``, acquiring the lock may be interrupted by a signal, + in which case this function returns :c:enumerator:`PY_LOCK_INTR`. Upon + interruption, it's generally expected that the caller makes a call to + :c:func:`Py_MakePendingCalls` to propagate an exception to Python code. + + If the lock is successfully acquired, this function returns + :c:enumerator:`PY_LOCK_ACQUIRED`. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: int PyThread_acquire_lock(PyThread_type_lock lock, int waitflag) + + Acquire *lock*. + + If *waitflag* is ``1`` and another thread currently holds the lock, this + function will wait until the lock can be acquired and will always return + ``1``. + + If *waitflag* is ``0`` and another thread holds the lock, this function will + not wait and instead return ``0``. If the lock is not held by any other + thread, then this function will acquire it and return ``1``. + + Unlike :c:func:`PyThread_acquire_lock_timed`, acquiring the lock cannot be + interrupted by a signal. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: int PyThread_release_lock(PyThread_type_lock lock) + + Release *lock*. If *lock* is not held, then this function issues a + fatal error. + + The caller does not need to hold an :term:`attached thread state`. + + +Operating System Thread APIs +============================ + +.. c:macro:: PYTHREAD_INVALID_THREAD_ID + + Sentinel value for an invalid thread ID. + + This is currently equivalent to ``(unsigned long)-1``. + + +.. c:function:: unsigned long PyThread_start_new_thread(void (*func)(void *), void *arg) + + Start function *func* in a new thread with argument *arg*. + The resulting thread is not intended to be joined. + + *func* must not be ``NULL``, but *arg* may be ``NULL``. + + On success, this function returns the identifier of the new thread; on failure, + this returns :c:macro:`PYTHREAD_INVALID_THREAD_ID`. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: unsigned long PyThread_get_thread_ident(void) + + Return the identifier of the current thread, which will never be zero. + + This function cannot fail, and the caller does not need to hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`threading.get_ident` + + +.. c:function:: PyObject *PyThread_GetInfo(void) + + Get general information about the current thread in the form of a + :ref:`struct sequence ` object. This information is + accessible as :py:attr:`sys.thread_info` in Python. + + On success, this returns a new :term:`strong reference` to the thread + information; on failure, this returns ``NULL`` with an exception set. + + The caller must hold an :term:`attached thread state`. + + +.. c:macro:: PY_HAVE_THREAD_NATIVE_ID + + This macro is defined when the system supports native thread IDs. + + +.. c:function:: unsigned long PyThread_get_thread_native_id(void) + + Get the native identifier of the current thread as it was assigned by the operating + system's kernel, which will never be less than zero. + + This function is only available when :c:macro:`PY_HAVE_THREAD_NATIVE_ID` is + defined. + + This function cannot fail, and the caller does not need to hold an + :term:`attached thread state`. + + .. seealso:: + :py:func:`threading.get_native_id` + + +.. c:function:: void PyThread_exit_thread(void) + + Terminate the current thread. This function is generally considered unsafe + and should be avoided. It is kept solely for backwards compatibility. + + This function is only safe to call if all functions in the full call + stack are written to safely allow it. + + .. warning:: + + If the current system uses POSIX threads (also known as "pthreads"), + this calls :manpage:`pthread_exit(3)`, which attempts to unwind the stack + and call C++ destructors on some libc implementations. However, if a + ``noexcept`` function is reached, it may terminate the process. + Other systems, such as macOS, do unwinding. + + On Windows, this function calls ``_endthreadex()``, which kills the thread + without calling C++ destructors. + + In any case, there is a risk of corruption on the thread's stack. + + .. deprecated:: 3.14 + + +.. c:function:: void PyThread_init_thread(void) + + Initialize ``PyThread*`` APIs. Python executes this function automatically, + so there's little need to call it from an extension module. + + +.. c:function:: int PyThread_set_stacksize(size_t size) + + Set the stack size of the current thread to *size* bytes. + + This function returns ``0`` on success, ``-1`` if *size* is invalid, or + ``-2`` if the system does not support changing the stack size. This function + does not set exceptions. + + The caller does not need to hold an :term:`attached thread state`. + + +.. c:function:: size_t PyThread_get_stacksize(void) + + Return the stack size of the current thread in bytes, or ``0`` if the system's + default stack size is in use. + + The caller does not need to hold an :term:`attached thread state`. diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index b20495e672d..c345029e4ac 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -102,7 +102,7 @@ Error Handling * Set *\*err_msg* and return ``1`` if an error is set. * Set *\*err_msg* to ``NULL`` and return ``0`` otherwise. - An error message is an UTF-8 encoded string. + An error message is a UTF-8 encoded string. If *config* has an exit code, format the exit code as an error message. diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index acce3dc215d..bb94bcb86a7 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -121,6 +121,10 @@ complete listing. Return the absolute value of ``x``. + If the result cannot be represented (for example, if ``x`` has + :c:macro:`!INT_MIN` value for :c:expr:`int` type), the behavior is + undefined. + .. versionadded:: 3.3 .. c:macro:: Py_ALWAYS_INLINE @@ -167,6 +171,17 @@ complete listing. Like ``getenv(s)``, but returns ``NULL`` if :option:`-E` was passed on the command line (see :c:member:`PyConfig.use_environment`). +.. c:macro:: Py_LOCAL(type) + + Declare a function returning the specified *type* using a fast-calling + qualifier for functions that are local to the current file. + Semantically, this is equivalent to ``static type``. + +.. c:macro:: Py_LOCAL_INLINE(type) + + Equivalent to :c:macro:`Py_LOCAL` but additionally requests the function + be inlined. + .. c:macro:: Py_MAX(x, y) Return the maximum value between ``x`` and ``y``. @@ -179,6 +194,14 @@ complete listing. .. versionadded:: 3.6 +.. c:macro:: Py_MEMCPY(dest, src, n) + + This is a :term:`soft deprecated` alias to :c:func:`!memcpy`. + Use :c:func:`!memcpy` directly instead. + + .. deprecated:: 3.14 + The macro is :term:`soft deprecated`. + .. c:macro:: Py_MIN(x, y) Return the minimum value between ``x`` and ``y``. @@ -233,9 +256,32 @@ complete listing. .. versionadded:: 3.4 +.. c:macro:: Py_BUILD_ASSERT(cond) + + Asserts a compile-time condition *cond*, as a statement. + The build will fail if the condition is false or cannot be evaluated at compile time. + + For example:: + + Py_BUILD_ASSERT(sizeof(PyTime_t) == sizeof(int64_t)); + + .. versionadded:: 3.3 + +.. c:macro:: Py_BUILD_ASSERT_EXPR(cond) + + Asserts a compile-time condition *cond*, as an expression that evaluates to ``0``. + The build will fail if the condition is false or cannot be evaluated at compile time. + + For example:: + + #define foo_to_char(foo) \ + ((char *)(foo) + Py_BUILD_ASSERT_EXPR(offsetof(struct foo, string) == 0)) + + .. versionadded:: 3.3 + .. c:macro:: PyDoc_STRVAR(name, str) - Creates a variable with name ``name`` that can be used in docstrings. + Creates a variable with name *name* that can be used in docstrings. If Python is built without docstrings, the value will be empty. Use :c:macro:`PyDoc_STRVAR` for docstrings to support building @@ -267,6 +313,28 @@ complete listing. {NULL, NULL} }; +.. c:macro:: PyDoc_VAR(name) + + Declares a static character array variable with the given name *name*. + + For example:: + + PyDoc_VAR(python_doc) = PyDoc_STR("A genus of constricting snakes in the Pythonidae family native " + "to the tropics and subtropics of the Eastern Hemisphere."); + +.. c:macro:: Py_ARRAY_LENGTH(array) + + Compute the length of a statically allocated C array at compile time. + + The *array* argument must be a C array with a size known at compile time. + Passing an array with an unknown size, such as a heap-allocated array, + will result in a compilation error on some compilers, or otherwise produce + incorrect results. + + This is roughly equivalent to:: + + sizeof(array) / sizeof((array)[0]) + .. _api-objects: diff --git a/Doc/c-api/iterator.rst b/Doc/c-api/iterator.rst index 6b7ba8c9979..bfbfe3c9279 100644 --- a/Doc/c-api/iterator.rst +++ b/Doc/c-api/iterator.rst @@ -50,3 +50,72 @@ sentinel value is returned. callable object that can be called with no parameters; each call to it should return the next item in the iteration. When *callable* returns a value equal to *sentinel*, the iteration will be terminated. + + +Range Objects +^^^^^^^^^^^^^ + +.. c:var:: PyTypeObject PyRange_Type + + The type object for :class:`range` objects. + + +.. c:function:: int PyRange_Check(PyObject *o) + + Return true if the object *o* is an instance of a :class:`range` object. + This function always succeeds. + + +Builtin Iterator Types +^^^^^^^^^^^^^^^^^^^^^^ + +These are built-in iteration types that are included in Python's C API, but +provide no additional functions. They are here for completeness. + + +.. list-table:: + :widths: auto + :header-rows: 1 + + * * C type + * Python type + * * .. c:var:: PyTypeObject PyEnum_Type + * :py:class:`enumerate` + * * .. c:var:: PyTypeObject PyFilter_Type + * :py:class:`filter` + * * .. c:var:: PyTypeObject PyMap_Type + * :py:class:`map` + * * .. c:var:: PyTypeObject PyReversed_Type + * :py:class:`reversed` + * * .. c:var:: PyTypeObject PyZip_Type + * :py:class:`zip` + + +Other Iterator Objects +^^^^^^^^^^^^^^^^^^^^^^ + +.. c:var:: PyTypeObject PyByteArrayIter_Type +.. c:var:: PyTypeObject PyBytesIter_Type +.. c:var:: PyTypeObject PyListIter_Type +.. c:var:: PyTypeObject PyListRevIter_Type +.. c:var:: PyTypeObject PySetIter_Type +.. c:var:: PyTypeObject PyTupleIter_Type +.. c:var:: PyTypeObject PyRangeIter_Type +.. c:var:: PyTypeObject PyLongRangeIter_Type +.. c:var:: PyTypeObject PyDictIterKey_Type +.. c:var:: PyTypeObject PyDictRevIterKey_Type +.. c:var:: PyTypeObject PyDictIterValue_Type +.. c:var:: PyTypeObject PyDictRevIterValue_Type +.. c:var:: PyTypeObject PyDictIterItem_Type +.. c:var:: PyTypeObject PyDictRevIterItem_Type +.. c:var:: PyTypeObject PyODictIter_Type + + Type objects for iterators of various built-in objects. + + Do not create instances of these directly; prefer calling + :c:func:`PyObject_GetIter` instead. + + Note that there is no guarantee that a given built-in type uses a given iterator + type. For example, iterating over :class:`range` will use one of two iterator + types depending on the size of the range. Other types may start using a + similar scheme in the future, without warning. diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index fcb20f7c93c..ed34efe716d 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -161,6 +161,17 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 +.. c:macro:: PyLong_FromPid(pid) + + Macro for creating a Python integer from a process identifier. + + This can be defined as an alias to :c:func:`PyLong_FromLong` or + :c:func:`PyLong_FromLongLong`, depending on the size of the system's + PID type. + + .. versionadded:: 3.2 + + .. c:function:: long PyLong_AsLong(PyObject *obj) .. index:: @@ -575,6 +586,17 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 +.. c:macro:: PyLong_AsPid(pid) + + Macro for converting a Python integer into a process identifier. + + This can be defined as an alias to :c:func:`PyLong_AsLong`, + :c:func:`PyLong_FromLongLong`, or :c:func:`PyLong_AsInt`, depending on the + size of the system's PID type. + + .. versionadded:: 3.2 + + .. c:function:: int PyLong_GetSign(PyObject *obj, int *sign) Get the sign of the integer object *obj*. diff --git a/Doc/c-api/mapping.rst b/Doc/c-api/mapping.rst index 1f55c0aa955..2476ebb9b69 100644 --- a/Doc/c-api/mapping.rst +++ b/Doc/c-api/mapping.rst @@ -102,7 +102,7 @@ See also :c:func:`PyObject_GetItem`, :c:func:`PyObject_SetItem` and .. note:: - Exceptions which occur when this calls :meth:`~object.__getitem__` + Exceptions which occur when this calls the :meth:`~object.__getitem__` method are silently ignored. For proper error handling, use :c:func:`PyMapping_HasKeyWithError`, :c:func:`PyMapping_GetOptionalItem` or :c:func:`PyObject_GetItem()` instead. @@ -116,7 +116,7 @@ See also :c:func:`PyObject_GetItem`, :c:func:`PyObject_SetItem` and .. note:: - Exceptions that occur when this calls :meth:`~object.__getitem__` + Exceptions that occur when this calls the :meth:`~object.__getitem__` method or while creating the temporary :class:`str` object are silently ignored. For proper error handling, use :c:func:`PyMapping_HasKeyStringWithError`, diff --git a/Doc/c-api/marshal.rst b/Doc/c-api/marshal.rst index 61218a1bf6f..668a163b2df 100644 --- a/Doc/c-api/marshal.rst +++ b/Doc/c-api/marshal.rst @@ -82,7 +82,7 @@ The following functions allow marshalled values to be read back in. assumes that no further objects will be read from the file, allowing it to aggressively load file data into memory so that the de-serialization can operate from data in memory rather than reading a byte at a time from the - file. Only use these variant if you are certain that you won't be reading + file. Only use this variant if you are certain that you won't be reading anything else from the file. On error, sets the appropriate exception (:exc:`EOFError`, :exc:`ValueError` diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index df1bb0ce370..23958980102 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -102,7 +102,7 @@ All allocating functions belong to one of three different "domains" (see also strategies and are optimized for different purposes. The specific details on how every domain allocates memory or what internal functions each domain calls is considered an implementation detail, but for debugging purposes a simplified -table can be found at :ref:`here `. +table can be found at :ref:`default-memory-allocators`. The APIs used to allocate and free a block of memory must be from the same domain. For example, :c:func:`PyMem_Free` must be used to free memory allocated using :c:func:`PyMem_Malloc`. diff --git a/Doc/c-api/memoryview.rst b/Doc/c-api/memoryview.rst index f6038032805..e4ac8b57673 100644 --- a/Doc/c-api/memoryview.rst +++ b/Doc/c-api/memoryview.rst @@ -13,6 +13,12 @@ A :class:`memoryview` object exposes the C level :ref:`buffer interface any other object. +.. c:var:: PyTypeObject PyMemoryView_Type + + This instance of :c:type:`PyTypeObject` represents the Python memoryview + type. This is the same object as :class:`memoryview` in the Python layer. + + .. c:function:: PyObject *PyMemoryView_FromObject(PyObject *obj) Create a memoryview object from an object that provides the buffer interface. diff --git a/Doc/c-api/module.rst b/Doc/c-api/module.rst index 6626f628fcc..a12f6331c85 100644 --- a/Doc/c-api/module.rst +++ b/Doc/c-api/module.rst @@ -3,17 +3,16 @@ .. _moduleobjects: Module Objects --------------- +============== .. index:: pair: object; module - .. c:var:: PyTypeObject PyModule_Type .. index:: single: ModuleType (in module types) This instance of :c:type:`PyTypeObject` represents the Python module type. This - is exposed to Python programs as ``types.ModuleType``. + is exposed to Python programs as :py:class:`types.ModuleType`. .. c:function:: int PyModule_Check(PyObject *p) @@ -71,6 +70,9 @@ Module Objects ``PyObject_*`` functions rather than directly manipulate a module's :attr:`~object.__dict__`. + The returned reference is borrowed from the module; it is valid until + the module is destroyed. + .. c:function:: PyObject* PyModule_GetNameObject(PyObject *module) @@ -90,12 +92,9 @@ Module Objects Similar to :c:func:`PyModule_GetNameObject` but return the name encoded to ``'utf-8'``. -.. c:function:: void* PyModule_GetState(PyObject *module) - - Return the "state" of the module, that is, a pointer to the block of memory - allocated at module creation time, or ``NULL``. See - :c:member:`PyModuleDef.m_size`. - + The returned buffer is only valid until the module is renamed or destroyed. + Note that Python code may rename a module by setting its :py:attr:`~module.__name__` + attribute. .. c:function:: PyModuleDef* PyModule_GetDef(PyObject *module) @@ -103,7 +102,7 @@ Module Objects created, or ``NULL`` if the module wasn't created from a definition. On error, return ``NULL`` with an exception set. - Use :c:func:`PyErr_Occurred` to tell this case apart from a mising + Use :c:func:`PyErr_Occurred` to tell this case apart from a missing :c:type:`!PyModuleDef`. @@ -126,215 +125,116 @@ Module Objects Similar to :c:func:`PyModule_GetFilenameObject` but return the filename encoded to 'utf-8'. + The returned buffer is only valid until the module's :py:attr:`~module.__file__` attribute + is reassigned or the module is destroyed. + .. deprecated:: 3.2 :c:func:`PyModule_GetFilename` raises :exc:`UnicodeEncodeError` on unencodable filenames, use :c:func:`PyModule_GetFilenameObject` instead. -.. _pymoduledef: +.. _pymoduledef_slot: -Module definitions ------------------- +Module definition +----------------- -The functions in the previous section work on any module object, including -modules imported from Python code. +Modules created using the C API are typically defined using an +array of :dfn:`slots`. +The slots provide a "description" of how a module should be created. -Modules defined using the C API typically use a *module definition*, -:c:type:`PyModuleDef` -- a statically allocated, constant “description" of -how a module should be created. +.. versionchanged:: next -The definition is usually used to define an extension's “main” module object -(see :ref:`extension-modules` for details). -It is also used to -:ref:`create extension modules dynamically `. + Previously, a :c:type:`PyModuleDef` struct was necessary to define modules. + The older way of defining modules is still available: consult either the + :ref:`pymoduledef` section or earlier versions of this documentation + if you plan to support earlier Python versions. -Unlike :c:func:`PyModule_New`, the definition allows management of -*module state* -- a piece of memory that is allocated and cleared together -with the module object. -Unlike the module's Python attributes, Python code cannot replace or delete -data stored in module state. +The slots array is usually used to define an extension module's “main” +module object (see :ref:`extension-modules` for details). +It can also be used to +:ref:`create extension modules dynamically `. -.. c:type:: PyModuleDef +Unless specified otherwise, the same slot ID may not be repeated +in an array of slots. - The module definition struct, which holds all information needed to create - a module object. - This structure must be statically allocated (or be otherwise guaranteed - to be valid while any modules created from it exist). - Usually, there is only one variable of this type for each extension module. - - .. c:member:: PyModuleDef_Base m_base - - Always initialize this member to :c:macro:`PyModuleDef_HEAD_INIT`. - - .. c:member:: const char *m_name - - Name for the new module. - - .. c:member:: const char *m_doc - - Docstring for the module; usually a docstring variable created with - :c:macro:`PyDoc_STRVAR` is used. - - .. c:member:: Py_ssize_t m_size - - Module state may be kept in a per-module memory area that can be - retrieved with :c:func:`PyModule_GetState`, rather than in static globals. - This makes modules safe for use in multiple sub-interpreters. - - This memory area is allocated based on *m_size* on module creation, - and freed when the module object is deallocated, after the - :c:member:`~PyModuleDef.m_free` function has been called, if present. - - Setting it to a non-negative value means that the module can be - re-initialized and specifies the additional amount of memory it requires - for its state. - - Setting ``m_size`` to ``-1`` means that the module does not support - sub-interpreters, because it has global state. - Negative ``m_size`` is only allowed when using - :ref:`legacy single-phase initialization ` - or when :ref:`creating modules dynamically `. - - See :PEP:`3121` for more details. - - .. c:member:: PyMethodDef* m_methods - - A pointer to a table of module-level functions, described by - :c:type:`PyMethodDef` values. Can be ``NULL`` if no functions are present. - - .. c:member:: PyModuleDef_Slot* m_slots - - An array of slot definitions for multi-phase initialization, terminated by - a ``{0, NULL}`` entry. - When using legacy single-phase initialization, *m_slots* must be ``NULL``. - - .. versionchanged:: 3.5 - - Prior to version 3.5, this member was always set to ``NULL``, - and was defined as: - - .. c:member:: inquiry m_reload - - .. c:member:: traverseproc m_traverse - - A traversal function to call during GC traversal of the module object, or - ``NULL`` if not needed. - - This function is not called if the module state was requested but is not - allocated yet. This is the case immediately after the module is created - and before the module is executed (:c:data:`Py_mod_exec` function). More - precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater - than 0 and the module state (as returned by :c:func:`PyModule_GetState`) - is ``NULL``. - - .. versionchanged:: 3.9 - No longer called before the module state is allocated. - - .. c:member:: inquiry m_clear - - A clear function to call during GC clearing of the module object, or - ``NULL`` if not needed. - - This function is not called if the module state was requested but is not - allocated yet. This is the case immediately after the module is created - and before the module is executed (:c:data:`Py_mod_exec` function). More - precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater - than 0 and the module state (as returned by :c:func:`PyModule_GetState`) - is ``NULL``. - - Like :c:member:`PyTypeObject.tp_clear`, this function is not *always* - called before a module is deallocated. For example, when reference - counting is enough to determine that an object is no longer used, - the cyclic garbage collector is not involved and - :c:member:`~PyModuleDef.m_free` is called directly. - - .. versionchanged:: 3.9 - No longer called before the module state is allocated. - - .. c:member:: freefunc m_free - - A function to call during deallocation of the module object, or ``NULL`` - if not needed. - - This function is not called if the module state was requested but is not - allocated yet. This is the case immediately after the module is created - and before the module is executed (:c:data:`Py_mod_exec` function). More - precisely, this function is not called if :c:member:`~PyModuleDef.m_size` is greater - than 0 and the module state (as returned by :c:func:`PyModule_GetState`) - is ``NULL``. - - .. versionchanged:: 3.9 - No longer called before the module state is allocated. - - -Module slots -............ .. c:type:: PyModuleDef_Slot .. c:member:: int slot - A slot ID, chosen from the available values explained below. + A slot ID, chosen from the available ``Py_mod_*`` values explained below. + + An ID of 0 marks the end of a :c:type:`!PyModuleDef_Slot` array. .. c:member:: void* value Value of the slot, whose meaning depends on the slot ID. + The value may not be NULL. + To leave a slot out, omit the :c:type:`PyModuleDef_Slot` entry entirely. + .. versionadded:: 3.5 -The available slot types are: -.. c:macro:: Py_mod_create +Metadata slots +.............. - Specifies a function that is called to create the module object itself. - The *value* pointer of this slot must point to a function of the signature: +.. c:macro:: Py_mod_name - .. c:function:: PyObject* create_module(PyObject *spec, PyModuleDef *def) - :no-index-entry: - :no-contents-entry: + :c:type:`Slot ID ` for the name of the new module, + as a NUL-terminated UTF8-encoded ``const char *``. - The function receives a :py:class:`~importlib.machinery.ModuleSpec` - instance, as defined in :PEP:`451`, and the module definition. - It should return a new module object, or set an error - and return ``NULL``. + Note that modules are typically created using a + :py:class:`~importlib.machinery.ModuleSpec`, and when they are, the + name from the spec will be used instead of :c:data:`!Py_mod_name`. + However, it is still recommended to include this slot for introspection + and debugging purposes. - This function should be kept minimal. In particular, it should not - call arbitrary Python code, as trying to import the same module again may - result in an infinite loop. + .. versionadded:: next - Multiple ``Py_mod_create`` slots may not be specified in one module - definition. + Use :c:member:`PyModuleDef.m_name` instead to support previous versions. - If ``Py_mod_create`` is not specified, the import machinery will create - a normal module object using :c:func:`PyModule_New`. The name is taken from - *spec*, not the definition, to allow extension modules to dynamically adjust - to their place in the module hierarchy and be imported under different - names through symlinks, all while sharing a single module definition. +.. c:macro:: Py_mod_doc - There is no requirement for the returned object to be an instance of - :c:type:`PyModule_Type`. Any type can be used, as long as it supports - setting and getting import-related attributes. - However, only ``PyModule_Type`` instances may be returned if the - ``PyModuleDef`` has non-``NULL`` ``m_traverse``, ``m_clear``, - ``m_free``; non-zero ``m_size``; or slots other than ``Py_mod_create``. + :c:type:`Slot ID ` for the docstring of the new + module, as a NUL-terminated UTF8-encoded ``const char *``. -.. c:macro:: Py_mod_exec + Usually it is set to a variable created with :c:macro:`PyDoc_STRVAR`. - Specifies a function that is called to *execute* the module. - This is equivalent to executing the code of a Python module: typically, - this function adds classes and constants to the module. - The signature of the function is: + .. versionadded:: next - .. c:function:: int exec_module(PyObject* module) - :no-index-entry: - :no-contents-entry: + Use :c:member:`PyModuleDef.m_doc` instead to support previous versions. - If multiple ``Py_mod_exec`` slots are specified, they are processed in the - order they appear in the *m_slots* array. + +Feature slots +............. + +.. c:macro:: Py_mod_abi + + :c:type:`Slot ID ` whose value points to + a :c:struct:`PyABIInfo` structure describing the ABI that + the extension is using. + + A suitable :c:struct:`!PyABIInfo` variable can be defined using the + :c:macro:`PyABIInfo_VAR` macro, as in: + + .. code-block:: c + + PyABIInfo_VAR(abi_info); + + static PyModuleDef_Slot mymodule_slots[] = { + {Py_mod_abi, &abi_info}, + ... + }; + + When creating a module, Python checks the value of this slot + using :c:func:`PyABIInfo_Check`. + + .. versionadded:: 3.15 .. c:macro:: Py_mod_multiple_interpreters - Specifies one of the following values: + :c:type:`Slot ID ` whose value is one of: .. c:namespace:: NULL @@ -357,9 +257,6 @@ The available slot types are: This slot determines whether or not importing this module in a subinterpreter will fail. - Multiple ``Py_mod_multiple_interpreters`` slots may not be specified - in one module definition. - If ``Py_mod_multiple_interpreters`` is not specified, the import machinery defaults to ``Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED``. @@ -367,7 +264,7 @@ The available slot types are: .. c:macro:: Py_mod_gil - Specifies one of the following values: + :c:type:`Slot ID ` whose value is one of: .. c:namespace:: NULL @@ -385,45 +282,482 @@ The available slot types are: this module will cause the GIL to be automatically enabled. See :ref:`whatsnew313-free-threaded-cpython` for more detail. - Multiple ``Py_mod_gil`` slots may not be specified in one module definition. - If ``Py_mod_gil`` is not specified, the import machinery defaults to ``Py_MOD_GIL_USED``. .. versionadded:: 3.13 -.. c:macro:: Py_mod_abi - A pointer to a :c:struct:`PyABIInfo` structure that describes the ABI that - the extension is using. +Creation and initialization slots +................................. - When the module is loaded, the :c:struct:`!PyABIInfo` in this slot is checked - using :c:func:`PyABIInfo_Check`. +.. c:macro:: Py_mod_create - A suitable :c:struct:`!PyABIInfo` variable can be defined using the - :c:macro:`PyABIInfo_VAR` macro, as in: + :c:type:`Slot ID ` for a function that creates + the module object itself. + The function must have the signature: - .. code-block:: c + .. c:function:: PyObject* create_module(PyObject *spec, PyModuleDef *def) + :no-index-entry: + :no-contents-entry: - PyABIInfo_VAR(abi_info); + The function will be called with: - static PyModuleDef_Slot mymodule_slots[] = { - {Py_mod_abi, &abi_info}, - ... - }; + - *spec*: a ``ModuleSpec``-like object, meaning that any attributes defined + for :py:class:`importlib.machinery.ModuleSpec` have matching semantics. + However, any of the attributes may be missing. + - *def*: ``NULL``, or the module definition if the module is created from one. - .. versionadded:: 3.15 + The function should return a new module object, or set an error + and return ``NULL``. + + This function should be kept minimal. In particular, it should not + call arbitrary Python code, as trying to import the same module again may + result in an infinite loop. + + If ``Py_mod_create`` is not specified, the import machinery will create + a normal module object using :c:func:`PyModule_New`. The name is taken from + *spec*, not the definition, to allow extension modules to dynamically adjust + to their place in the module hierarchy and be imported under different + names through symlinks, all while sharing a single module definition. + + There is no requirement for the returned object to be an instance of + :c:type:`PyModule_Type`. + However, some slots may only be used with + :c:type:`!PyModule_Type` instances; in particular: + + - :c:macro:`Py_mod_exec`, + - :ref:`module state slots ` (``Py_mod_state_*``), + - :c:macro:`Py_mod_token`. + + .. versionadded:: 3.5 + + .. versionchanged:: next + + The *slots* argument may be a ``ModuleSpec``-like object, rather than + a true :py:class:`~importlib.machinery.ModuleSpec` instance. + Note that previous versions of CPython did not enforce this. + + The *def* argument may now be ``NULL``, since modules are not necessarily + made from definitions. + +.. c:macro:: Py_mod_exec + + :c:type:`Slot ID ` for a function that will + :dfn:`execute`, or initialize, the module. + This function does the equivalent to executing the code of a Python module: + typically, it adds classes and constants to the module. + The signature of the function is: + + .. c:function:: int exec_module(PyObject* module) + :no-index-entry: + :no-contents-entry: + + See the :ref:`capi-module-support-functions` section for some useful + functions to call. + + For backwards compatibility, the :c:type:`PyModuleDef.m_slots` array may + contain multiple :c:macro:`!Py_mod_exec` slots; these are processed in the + order they appear in the array. + Elsewhere (that is, in arguments to :c:func:`PyModule_FromSlotsAndSpec` + and in return values of :samp:`PyModExport_{}`), repeating the slot + is not allowed. + + .. versionadded:: 3.5 + + .. versionchanged:: next + + Repeated ``Py_mod_exec`` slots are disallowed, except in + :c:type:`PyModuleDef.m_slots`. + +.. c:macro:: Py_mod_methods + + :c:type:`Slot ID ` for a table of module-level + functions, as an array of :c:type:`PyMethodDef` values suitable as the + *functions* argument to :c:func:`PyModule_AddFunctions`. + + Like other slot IDs, a slots array may only contain one + :c:macro:`!Py_mod_methods` entry. + To add functions from multiple :c:type:`PyMethodDef` arrays, call + :c:func:`PyModule_AddFunctions` in the :c:macro:`Py_mod_exec` function. + + The table must be statically allocated (or otherwise guaranteed to outlive + the module object). + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_methods` instead to support previous versions. + +.. _ext-module-state: + +Module state +------------ + +Extension modules can have *module state* -- a +piece of memory that is allocated on module creation, +and freed when the module object is deallocated. +The module state is specified using :ref:`dedicated slots `. + +A typical use of module state is storing an exception type -- or indeed *any* +type object defined by the module -- + +Unlike the module's Python attributes, Python code cannot replace or delete +data stored in module state. + +Keeping per-module information in attributes and module state, rather than in +static globals, makes module objects *isolated* and safer for use in +multiple sub-interpreters. +It also helps Python do an orderly clean-up when it shuts down. + +Extensions that keep references to Python objects as part of module state must +implement :c:macro:`Py_mod_state_traverse` and :c:macro:`Py_mod_state_clear` +functions to avoid reference leaks. + +To retrieve the state from a given module, use the following functions: + +.. c:function:: void* PyModule_GetState(PyObject *module) + + Return the "state" of the module, that is, a pointer to the block of memory + allocated at module creation time, or ``NULL``. See + :c:macro:`Py_mod_state_size`. + + On error, return ``NULL`` with an exception set. + Use :c:func:`PyErr_Occurred` to tell this case apart from missing + module state. -.. _moduledef-dynamic: +.. c:function:: int PyModule_GetStateSize(PyObject *, Py_ssize_t *result) + + Set *\*result* to the size of the module's state, as specified using + :c:macro:`Py_mod_state_size` (or :c:member:`PyModuleDef.m_size`), + and return 0. + + On error, set *\*result* to -1, and return -1 with an exception set. + + .. versionadded:: next + + + +.. _ext-module-state-slots: + +Slots for defining module state +............................... + +The following :c:member:`PyModuleDef_Slot.slot` IDs are available for +defining the module state. + +.. c:macro:: Py_mod_state_size + + :c:type:`Slot ID ` for the size of the module state, + in bytes. + + Setting the value to a non-negative value means that the module can be + re-initialized and specifies the additional amount of memory it requires + for its state. + + See :PEP:`3121` for more details. + + Use :c:func:`PyModule_GetStateSize` to retrieve the size of a given module. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_size` instead to support previous versions. + +.. c:macro:: Py_mod_state_traverse + + :c:type:`Slot ID ` for a traversal function to call + during GC traversal of the module object. + + The signature of the function, and meanings of the arguments, + is similar as for :c:member:`PyTypeObject.tp_traverse`: + + .. c:function:: int traverse_module_state(PyObject *module, visitproc visit, void *arg) + :no-index-entry: + :no-contents-entry: + + This function is not called if the module state was requested but is not + allocated yet. This is the case immediately after the module is created + and before the module is executed (:c:data:`Py_mod_exec` function). More + precisely, this function is not called if the state size + (:c:data:`Py_mod_state_size`) is greater than 0 and the module state + (as returned by :c:func:`PyModule_GetState`) is ``NULL``. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_size` instead to support previous versions. + +.. c:macro:: Py_mod_state_clear + + :c:type:`Slot ID ` for a clear function to call + during GC clearing of the module object. + + The signature of the function is: + + .. c:function:: int clear_module_state(PyObject* module) + :no-index-entry: + :no-contents-entry: + + This function is not called if the module state was requested but is not + allocated yet. This is the case immediately after the module is created + and before the module is executed (:c:data:`Py_mod_exec` function). More + precisely, this function is not called if the state size + (:c:data:`Py_mod_state_size`) is greater than 0 and the module state + (as returned by :c:func:`PyModule_GetState`) is ``NULL``. + + Like :c:member:`PyTypeObject.tp_clear`, this function is not *always* + called before a module is deallocated. For example, when reference + counting is enough to determine that an object is no longer used, + the cyclic garbage collector is not involved and + the :c:macro:`Py_mod_state_free` function is called directly. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_clear` instead to support previous versions. + +.. c:macro:: Py_mod_state_free + + :c:type:`Slot ID ` for a function to call during + deallocation of the module object. + + The signature of the function is: + + .. c:function:: int free_module_state(PyObject* module) + :no-index-entry: + :no-contents-entry: + + This function is not called if the module state was requested but is not + allocated yet. This is the case immediately after the module is created + and before the module is executed (:c:data:`Py_mod_exec` function). More + precisely, this function is not called if the state size + (:c:data:`Py_mod_state_size`) is greater than 0 and the module state + (as returned by :c:func:`PyModule_GetState`) is ``NULL``. + + .. versionadded:: next + + Use :c:member:`PyModuleDef.m_free` instead to support previous versions. + + +.. _ext-module-token: + +Module token +............ + +Each module may have an associated *token*: a pointer-sized value intended to +identify of the module state's memory layout. +This means that if you have a module object, but you are not sure if it +“belongs” to your extension, you can check using code like this: + +.. code-block:: c + + PyObject *module = + + void *module_token; + if (PyModule_GetToken(module, &module_token) < 0) { + return NULL; + } + if (module_token != your_token) { + PyErr_SetString(PyExc_ValueError, "unexpected module") + return NULL; + } + + // This module's state has the expected memory layout; it's safe to cast + struct my_state state = (struct my_state*)PyModule_GetState(module) + +A module's token -- and the *your_token* value to use in the above code -- is: + +- For modules created with :c:type:`PyModuleDef`: the address of that + :c:type:`PyModuleDef`; +- For modules defined with the :c:macro:`Py_mod_token` slot: the value + of that slot; +- For modules created from an ``PyModExport_*`` + :ref:`export hook `: the slots array that the export + hook returned (unless overriden with :c:macro:`Py_mod_token`). + +.. c:macro:: Py_mod_token + + :c:type:`Slot ID ` for the module token. + + If you use this slot to set the module token (rather than rely on the + default), you must ensure that: + + * The pointer outlives the class, so it's not reused for something else + while the class exists. + * It "belongs" to the extension module where the class lives, so it will not + clash with other extensions. + * If the token points to a :c:type:`PyModuleDef` struct, the module should + behave as if it was created from that :c:type:`PyModuleDef`. + In particular, the module state must have matching layout and semantics. + + Modules created from :c:type:`PyModuleDef` allways use the address of + the :c:type:`PyModuleDef` as the token. + This means that :c:macro:`!Py_mod_token` cannot be used in + :c:member:`PyModuleDef.m_slots`. + + .. versionadded:: next + +.. c:function:: int PyModule_GetToken(PyObject *module, void** result) + + Set *\*result* to the module's token and return 0. + + On error, set *\*result* to NULL, and return -1 with an exception set. + + .. versionadded:: next + +See also :c:func:`PyType_GetModuleByToken`. + + +.. _module-from-slots: Creating extension modules dynamically -------------------------------------- -The following functions may be used to create a module outside of an -extension's :ref:`initialization function `. -They are also used in -:ref:`single-phase initialization `. +The following functions may be used to create an extension module dynamically, +rather than from an extension's :ref:`export hook `. + +.. c:function:: PyObject *PyModule_FromSlotsAndSpec(const PyModuleDef_Slot *slots, PyObject *spec) + + Create a new module object, given an array of :ref:`slots ` + and the :py:class:`~importlib.machinery.ModuleSpec` *spec*. + + The *slots* argument must point to an array of :c:type:`PyModuleDef_Slot` + structures, terminated by an entry slot with slot ID of 0 + (typically written as ``{0}`` or ``{0, NULL}`` in C). + The *slots* argument may not be ``NULL``. + + The *spec* argument may be any ``ModuleSpec``-like object, as described + in :c:macro:`Py_mod_create` documentation. + Currently, the *spec* must have a ``name`` attribute. + + On success, return the new module. + On error, return ``NULL`` with an exception set. + + Note that this does not process the module's execution slot + (:c:data:`Py_mod_exec`). + Both :c:func:`!PyModule_FromSlotsAndSpec` and :c:func:`PyModule_Exec` + must be called to fully initialize a module. + (See also :ref:`multi-phase-initialization`.) + + The *slots* array only needs to be valid for the duration of the + :c:func:`!PyModule_FromSlotsAndSpec` call. + In particular, it may be heap-allocated. + + .. versionadded:: next + +.. c:function:: int PyModule_Exec(PyObject *module) + + Execute the :c:data:`Py_mod_exec` slot(s) of the given *module*. + + On success, return 0. + On error, return -1 with an exception set. + + For clarity: If *module* has no slots, for example if it uses + :ref:`legacy single-phase initialization `, + this function does nothing and returns 0. + + .. versionadded:: next + + + +.. _pymoduledef: + +Module definition struct +------------------------ + +Traditionally, extension modules were defined using a *module definition* +as the “description" of how a module should be created. +Rather than using an array of :ref:`slots ` directly, +the definition has dedicated members for most common functionality, +and allows additional slots as an extension mechanism. + +This way of defining modules is still available and there are no plans to +remove it. + +.. c:type:: PyModuleDef + + The module definition struct, which holds information needed to create + a module object. + + This structure must be statically allocated (or be otherwise guaranteed + to be valid while any modules created from it exist). + Usually, there is only one variable of this type for each extension module + defined this way. + + .. c:member:: PyModuleDef_Base m_base + + Always initialize this member to :c:macro:`PyModuleDef_HEAD_INIT`: + + .. c:namespace:: NULL + + .. c:type:: PyModuleDef_Base + + The type of :c:member:`!PyModuleDef.m_base`. + + .. c:macro:: PyModuleDef_HEAD_INIT + + The required initial value for :c:member:`!PyModuleDef.m_base`. + + .. c:member:: const char *m_name + + Corresponds to the :c:macro:`Py_mod_name` slot. + + .. c:member:: const char *m_doc + + These members correspond to the :c:macro:`Py_mod_doc` slot. + Setting this to NULL is equivalent to omitting the slot. + + .. c:member:: Py_ssize_t m_size + + Corresponds to the :c:macro:`Py_mod_state_size` slot. + Setting this to zero is equivalent to omitting the slot. + + When using :ref:`legacy single-phase initialization ` + or when creating modules dynamically using :c:func:`PyModule_Create` + or :c:func:`PyModule_Create2`, :c:member:`!m_size` may be set to -1. + This indicates that the module does not support sub-interpreters, + because it has global state. + + .. c:member:: PyMethodDef *m_methods + + Corresponds to the :c:macro:`Py_mod_methods` slot. + Setting this to NULL is equivalent to omitting the slot. + + .. c:member:: PyModuleDef_Slot* m_slots + + An array of additional slots, terminated by a ``{0, NULL}`` entry. + + This array may not contain slots corresponding to :c:type:`PyModuleDef` + members. + For example, you cannot use :c:macro:`Py_mod_name` in :c:member:`!m_slots`; + the module name must be given as :c:member:`PyModuleDef.m_name`. + + .. versionchanged:: 3.5 + + Prior to version 3.5, this member was always set to ``NULL``, + and was defined as: + + .. c:member:: inquiry m_reload + + .. c:member:: traverseproc m_traverse + inquiry m_clear + freefunc m_free + + These members correspond to the :c:macro:`Py_mod_state_traverse`, + :c:macro:`Py_mod_state_clear`, and :c:macro:`Py_mod_state_free` slots, + respectively. + + Setting these members to NULL is equivalent to omitting the + corresponding slots. + + .. versionchanged:: 3.9 + + :c:member:`m_traverse`, :c:member:`m_clear` and :c:member:`m_free` + functions are longer called before the module state is allocated. + + +.. _moduledef-dynamic: + +The following API can be used to create modules from a :c:type:`!PyModuleDef` +struct: .. c:function:: PyObject* PyModule_Create(PyModuleDef *def) @@ -500,12 +834,13 @@ They are also used in useful for versioning. This may change in the future. +.. _capi-module-support-functions: + Support functions ----------------- -The following functions are provided to help initialize a module -state. -They are intended for a module's execution slots (:c:data:`Py_mod_exec`), +The following functions are provided to help initialize a module object. +They are intended for a module's execution slot (:c:data:`Py_mod_exec`), the initialization function for legacy :ref:`single-phase initialization `, or code that creates modules dynamically. @@ -671,6 +1006,9 @@ or code that creates modules dynamically. :c:type:`PyMethodDef` arrays; in that case they should call this function directly. + The *functions* array must be statically allocated (or otherwise guaranteed + to outlive the module object). + .. versionadded:: 3.5 .. c:function:: int PyModule_SetDocString(PyObject *module, const char *docstring) diff --git a/Doc/c-api/monitoring.rst b/Doc/c-api/monitoring.rst index 7926148302a..b0227c2f4fa 100644 --- a/Doc/c-api/monitoring.rst +++ b/Doc/c-api/monitoring.rst @@ -136,7 +136,7 @@ Managing the Monitoring State ----------------------------- Monitoring states can be managed with the help of monitoring scopes. A scope -would typically correspond to a python function. +would typically correspond to a Python function. .. c:function:: int PyMonitoring_EnterScope(PyMonitoringState *state_array, uint64_t *version, const uint8_t *event_types, Py_ssize_t length) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 8629b768a29..76971c46c16 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -73,7 +73,7 @@ Object Protocol Flag to be used with multiple functions that print the object (like :c:func:`PyObject_Print` and :c:func:`PyFile_WriteObject`). - If passed, these function would use the :func:`str` of the object + If passed, these functions use the :func:`str` of the object instead of the :func:`repr`. @@ -85,6 +85,35 @@ Object Protocol instead of the :func:`repr`. +.. c:function:: void PyUnstable_Object_Dump(PyObject *op) + + Dump an object *op* to ``stderr``. This should only be used for debugging. + + The output is intended to try dumping objects even after memory corruption: + + * Information is written starting with fields that are the least likely to + crash when accessed. + * This function can be called without an :term:`attached thread state`, but + it's not recommended to do so: it can cause deadlocks. + * An object that does not belong to the current interpreter may be dumped, + but this may also cause crashes or unintended behavior. + * Implement a heuristic to detect if the object memory has been freed. Don't + display the object contents in this case, only its memory address. + * The output format may change at any time. + + Example of output: + + .. code-block:: output + + object address : 0x7f80124702c0 + object refcount : 2 + object type : 0x9902e0 + object type name: str + object repr : 'abcdef' + + .. versionadded:: next + + .. c:function:: int PyObject_HasAttrWithError(PyObject *o, PyObject *attr_name) Returns ``1`` if *o* has the attribute *attr_name*, and ``0`` otherwise. diff --git a/Doc/c-api/picklebuffer.rst b/Doc/c-api/picklebuffer.rst new file mode 100644 index 00000000000..9e2d92341b0 --- /dev/null +++ b/Doc/c-api/picklebuffer.rst @@ -0,0 +1,59 @@ +.. highlight:: c + +.. _picklebuffer-objects: + +.. index:: + pair: object; PickleBuffer + +Pickle buffer objects +--------------------- + +.. versionadded:: 3.8 + +A :class:`pickle.PickleBuffer` object wraps a :ref:`buffer-providing object +` for out-of-band data transfer with the :mod:`pickle` module. + + +.. c:var:: PyTypeObject PyPickleBuffer_Type + + This instance of :c:type:`PyTypeObject` represents the Python pickle buffer type. + This is the same object as :class:`pickle.PickleBuffer` in the Python layer. + + +.. c:function:: int PyPickleBuffer_Check(PyObject *op) + + Return true if *op* is a pickle buffer instance. + This function always succeeds. + + +.. c:function:: PyObject *PyPickleBuffer_FromObject(PyObject *obj) + + Create a pickle buffer from the object *obj*. + + This function will fail if *obj* doesn't support the :ref:`buffer protocol `. + + On success, return a new pickle buffer instance. + On failure, set an exception and return ``NULL``. + + Analogous to calling :class:`pickle.PickleBuffer` with *obj* in Python. + + +.. c:function:: const Py_buffer *PyPickleBuffer_GetBuffer(PyObject *picklebuf) + + Get a pointer to the underlying :c:type:`Py_buffer` that the pickle buffer wraps. + + The returned pointer is valid as long as *picklebuf* is alive and has not been + released. The caller must not modify or free the returned :c:type:`Py_buffer`. + If the pickle buffer has been released, raise :exc:`ValueError`. + + On success, return a pointer to the buffer view. + On failure, set an exception and return ``NULL``. + + +.. c:function:: int PyPickleBuffer_Release(PyObject *picklebuf) + + Release the underlying buffer held by the pickle buffer. + + Return ``0`` on success. On failure, set an exception and return ``-1``. + + Analogous to calling :meth:`pickle.PickleBuffer.release` in Python. diff --git a/Doc/c-api/set.rst b/Doc/c-api/set.rst index cba823aa027..09c0fb6b9c5 100644 --- a/Doc/c-api/set.rst +++ b/Doc/c-api/set.rst @@ -147,7 +147,7 @@ subtypes but not for instances of :class:`frozenset` or its subtypes. Return ``1`` if found and removed, ``0`` if not found (no action taken), and ``-1`` if an error is encountered. Does not raise :exc:`KeyError` for missing keys. Raise a - :exc:`TypeError` if the *key* is unhashable. Unlike the Python :meth:`~frozenset.discard` + :exc:`TypeError` if the *key* is unhashable. Unlike the Python :meth:`~set.discard` method, this function does not automatically convert unhashable sets into temporary frozensets. Raise :exc:`SystemError` if *set* is not an instance of :class:`set` or its subtype. diff --git a/Doc/c-api/stable.rst b/Doc/c-api/stable.rst index b08a7bf1b2f..f5e6b7ad157 100644 --- a/Doc/c-api/stable.rst +++ b/Doc/c-api/stable.rst @@ -279,7 +279,7 @@ The full API is described below for advanced use cases. .. c:member:: uint8_t abiinfo_minor_version - The major version of :c:struct:`PyABIInfo`. + The minor version of :c:struct:`PyABIInfo`. Must be set to ``0``; larger values are reserved for backwards-compatible future versions of :c:struct:`!PyABIInfo`. diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 58dd915e04f..62f45def04f 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -280,6 +280,8 @@ Implementing functions and methods Name of the method. + A ``NULL`` *ml_name* marks the end of a :c:type:`!PyMethodDef` array. + .. c:member:: PyCFunction ml_meth Pointer to the C implementation. @@ -447,6 +449,25 @@ definition with the same method name. slot. This is helpful because calls to PyCFunctions are optimized more than wrapper object calls. + +.. c:var:: PyTypeObject PyCMethod_Type + + The type object corresponding to Python C method objects. This is + available as :class:`types.BuiltinMethodType` in the Python layer. + + +.. c:function:: int PyCMethod_Check(PyObject *op) + + Return true if *op* is an instance of the :c:type:`PyCMethod_Type` type + or a subtype of it. This function always succeeds. + + +.. c:function:: int PyCMethod_CheckExact(PyObject *op) + + This is the same as :c:func:`PyCMethod_Check`, but does not account for + subtypes. + + .. c:function:: PyObject * PyCMethod_New(PyMethodDef *ml, PyObject *self, PyObject *module, PyTypeObject *cls) Turn *ml* into a Python :term:`callable` object. @@ -472,6 +493,24 @@ definition with the same method name. .. versionadded:: 3.9 +.. c:var:: PyTypeObject PyCFunction_Type + + The type object corresponding to Python C function objects. This is + available as :class:`types.BuiltinFunctionType` in the Python layer. + + +.. c:function:: int PyCFunction_Check(PyObject *op) + + Return true if *op* is an instance of the :c:type:`PyCFunction_Type` type + or a subtype of it. This function always succeeds. + + +.. c:function:: int PyCFunction_CheckExact(PyObject *op) + + This is the same as :c:func:`PyCFunction_Check`, but does not account for + subtypes. + + .. c:function:: PyObject * PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module) Equivalent to ``PyCMethod_New(ml, self, module, NULL)``. @@ -482,6 +521,62 @@ definition with the same method name. Equivalent to ``PyCMethod_New(ml, self, NULL, NULL)``. +.. c:function:: int PyCFunction_GetFlags(PyObject *func) + + Get the function's flags on *func* as they were passed to + :c:member:`~PyMethodDef.ml_flags`. + + If *func* is not a C function object, this fails with an exception. + *func* must not be ``NULL``. + + This function returns the function's flags on success, and ``-1`` with an + exception set on failure. + + +.. c:function:: int PyCFunction_GET_FLAGS(PyObject *func) + + This is the same as :c:func:`PyCFunction_GetFlags`, but without error + or type checking. + + +.. c:function:: PyCFunction PyCFunction_GetFunction(PyObject *func) + + Get the function pointer on *func* as it was passed to + :c:member:`~PyMethodDef.ml_meth`. + + If *func* is not a C function object, this fails with an exception. + *func* must not be ``NULL``. + + This function returns the function pointer on success, and ``NULL`` with an + exception set on failure. + + +.. c:function:: int PyCFunction_GET_FUNCTION(PyObject *func) + + This is the same as :c:func:`PyCFunction_GetFunction`, but without error + or type checking. + + +.. c:function:: PyObject *PyCFunction_GetSelf(PyObject *func) + + Get the "self" object on *func*. This is the object that would be passed + to the first argument of a :c:type:`PyCFunction`. For C function objects + created through a :c:type:`PyMethodDef` on a :c:type:`PyModuleDef`, this + is the resulting module object. + + If *func* is not a C function object, this fails with an exception. + *func* must not be ``NULL``. + + This function returns a :term:`borrowed reference` to the "self" object + on success, and ``NULL`` with an exception set on failure. + + +.. c:function:: PyObject *PyCFunction_GET_SELF(PyObject *func) + + This is the same as :c:func:`PyCFunction_GetSelf`, but without error or + type checking. + + Accessing attributes of extension types --------------------------------------- @@ -605,14 +700,12 @@ The following flags can be used with :c:member:`PyMemberDef.flags`: entry indicates an offset from the subclass-specific data, rather than from ``PyObject``. - Can only be used as part of :c:member:`Py_tp_members ` + Can only be used as part of the :c:data:`Py_tp_members` :c:type:`slot ` when creating a class using negative :c:member:`~PyType_Spec.basicsize`. It is mandatory in that case. - - This flag is only used in :c:type:`PyType_Slot`. - When setting :c:member:`~PyTypeObject.tp_members` during - class creation, Python clears it and sets + When setting :c:member:`~PyTypeObject.tp_members` from the slot during + class creation, Python clears the flag and sets :c:member:`PyMemberDef.offset` to the offset from the ``PyObject`` struct. .. index:: diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst index 336e3ef9640..ee73c1c8ada 100644 --- a/Doc/c-api/sys.rst +++ b/Doc/c-api/sys.rst @@ -123,6 +123,24 @@ Operating System Utilities This is a thin wrapper around either :c:func:`!sigaction` or :c:func:`!signal`. Do not call those functions directly! + +.. c:function:: int PyOS_InterruptOccurred(void) + + Check if a :c:macro:`!SIGINT` signal has been received. + + Returns ``1`` if a :c:macro:`!SIGINT` has occurred and clears the signal flag, + or ``0`` otherwise. + + In most cases, you should prefer :c:func:`PyErr_CheckSignals` over this function. + :c:func:`!PyErr_CheckSignals` invokes the appropriate signal handlers + for all pending signals, allowing Python code to handle the signal properly. + This function only detects :c:macro:`!SIGINT` and does not invoke any Python + signal handlers. + + This function is async-signal-safe and this function cannot fail. + The caller must hold an :term:`attached thread state`. + + .. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size) .. warning:: diff --git a/Doc/c-api/tuple.rst b/Doc/c-api/tuple.rst index 14a7c05efea..d0add48d7e8 100644 --- a/Doc/c-api/tuple.rst +++ b/Doc/c-api/tuple.rst @@ -61,7 +61,7 @@ Tuple Objects .. c:function:: Py_ssize_t PyTuple_Size(PyObject *p) Take a pointer to a tuple object, and return the size of that tuple. - On error, return ``-1`` and with an exception set. + On error, return ``-1`` with an exception set. .. c:function:: Py_ssize_t PyTuple_GET_SIZE(PyObject *p) diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index 5bdbff4e0ad..1f57cc04f5d 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -116,6 +116,20 @@ Type Objects .. versionadded:: 3.12 +.. c:function:: int PyType_Unwatch(int watcher_id, PyObject *type) + + Mark *type* as not watched. This undoes a previous call to + :c:func:`PyType_Watch`. *type* must not be ``NULL``. + + An extension should never call this function with a *watcher_id* that was + not returned to it by a previous call to :c:func:`PyType_AddWatcher`. + + On success, this function returns ``0``. On failure, this function returns + ``-1`` with an exception set. + + .. versionadded:: 3.12 + + .. c:type:: int (*PyType_WatchCallback)(PyObject *type) Type of a type-watcher callback function. @@ -133,6 +147,18 @@ Type Objects Type features are denoted by single bit flags. +.. c:function:: int PyType_FastSubclass(PyTypeObject *type, int flag) + + Return non-zero if the type object *type* sets the subclass flag *flag*. + Subclass flags are denoted by + :c:macro:`Py_TPFLAGS_*_SUBCLASS `. + This function is used by many ``_Check`` functions for common types. + + .. seealso:: + :c:func:`PyObject_TypeCheck`, which is used as a slower alternative in + ``_Check`` functions for types that don't come with subclass flags. + + .. c:function:: int PyType_IS_GC(PyTypeObject *o) Return true if the type object includes support for the cycle detector; this @@ -169,12 +195,14 @@ Type Objects before initialization) and should be paired with :c:func:`PyObject_Free` in :c:member:`~PyTypeObject.tp_free`. + .. c:function:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds) Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type object. Creates a new instance using the type's :c:member:`~PyTypeObject.tp_alloc` slot and returns the resulting object. + .. c:function:: int PyType_Ready(PyTypeObject *type) Finalize a type object. This should be called on all type objects to finish @@ -191,6 +219,7 @@ Type Objects GC protocol itself by at least implementing the :c:member:`~PyTypeObject.tp_traverse` handle. + .. c:function:: PyObject* PyType_GetName(PyTypeObject *type) Return the type's name. Equivalent to getting the type's @@ -198,6 +227,7 @@ Type Objects .. versionadded:: 3.11 + .. c:function:: PyObject* PyType_GetQualName(PyTypeObject *type) Return the type's qualified name. Equivalent to getting the @@ -213,6 +243,7 @@ Type Objects .. versionadded:: 3.13 + .. c:function:: PyObject* PyType_GetModuleName(PyTypeObject *type) Return the type's module name. Equivalent to getting the @@ -220,6 +251,7 @@ Type Objects .. versionadded:: 3.13 + .. c:function:: void* PyType_GetSlot(PyTypeObject *type, int slot) Return the function pointer stored in the given slot. If the @@ -236,6 +268,7 @@ Type Objects :c:func:`PyType_GetSlot` can now accept all types. Previously, it was limited to :ref:`heap types `. + .. c:function:: PyObject* PyType_GetModule(PyTypeObject *type) Return the module object associated with the given type when the type was @@ -250,11 +283,12 @@ Type Objects ``Py_TYPE(self)`` may be a *subclass* of the intended class, and subclasses are not necessarily defined in the same module as their superclass. See :c:type:`PyCMethod` to get the class that defines the method. - See :c:func:`PyType_GetModuleByDef` for cases when :c:type:`!PyCMethod` cannot - be used. + See :c:func:`PyType_GetModuleByToken` for cases when :c:type:`!PyCMethod` + cannot be used. .. versionadded:: 3.9 + .. c:function:: void* PyType_GetModuleState(PyTypeObject *type) Return the state of the module object associated with the given type. @@ -269,10 +303,11 @@ Type Objects .. versionadded:: 3.9 -.. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) - Find the first superclass whose module was created from - the given :c:type:`PyModuleDef` *def*, and return that module. +.. c:function:: PyObject* PyType_GetModuleByToken(PyTypeObject *type, const void *mod_token) + + Find the first superclass whose module has the given + :ref:`module token `, and return that module. If no module is found, raises a :py:class:`TypeError` and returns ``NULL``. @@ -282,16 +317,34 @@ Type Objects and other places where a method's defining class cannot be passed using the :c:type:`PyCMethod` calling convention. + .. versionadded:: next + + +.. c:function:: PyObject* PyType_GetModuleByDef(PyTypeObject *type, struct PyModuleDef *def) + + Find the first superclass whose module was created from the given + :c:type:`PyModuleDef` *def*, or whose :ref:`module token ` + is equal to *def*, and return that module. + + Note that modules created from a :c:type:`PyModuleDef` always have their + token set to the :c:type:`PyModuleDef`'s address. + In other words, this function is equivalent to + :c:func:`PyType_GetModuleByToken`, except that it: + + - returns a borrowed reference, and + - has a non-``void*`` argument type (which is a cosmetic difference in C). + The returned reference is :term:`borrowed ` from *type*, and will be valid as long as you hold a reference to *type*. Do not release it with :c:func:`Py_DECREF` or similar. .. versionadded:: 3.11 -.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) + +.. c:function:: int PyType_GetBaseByToken(PyTypeObject *type, void *tp_token, PyTypeObject **result) Find the first superclass in *type*'s :term:`method resolution order` whose - :c:macro:`Py_tp_token` token is equal to the given one. + :c:macro:`Py_tp_token` token is equal to *tp_token*. * If found, set *\*result* to a new :term:`strong reference` to it and return ``1``. @@ -302,10 +355,11 @@ Type Objects The *result* argument may be ``NULL``, in which case *\*result* is not set. Use this if you need only the return value. - The *token* argument may not be ``NULL``. + The *tp_token* argument may not be ``NULL``. .. versionadded:: 3.14 + .. c:function:: int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) Attempt to assign a version tag to the given type. @@ -316,6 +370,16 @@ Type Objects .. versionadded:: 3.12 +.. c:function:: int PyType_SUPPORTS_WEAKREFS(PyTypeObject *type) + + Return true if instances of *type* support creating weak references, false + otherwise. This function always succeeds. *type* must not be ``NULL``. + + .. seealso:: + * :ref:`weakrefobjects` + * :py:mod:`weakref` + + Creating Heap-Allocated Types ............................. @@ -336,8 +400,8 @@ The following functions and structs are used to create The *bases* argument can be used to specify base classes; it can either be only one class or a tuple of classes. - If *bases* is ``NULL``, the *Py_tp_bases* slot is used instead. - If that also is ``NULL``, the *Py_tp_base* slot is used instead. + If *bases* is ``NULL``, the :c:data:`Py_tp_bases` slot is used instead. + If that also is ``NULL``, the :c:data:`Py_tp_base` slot is used instead. If that also is ``NULL``, the new type derives from :class:`object`. The *module* argument can be used to record the module in which the new @@ -364,6 +428,7 @@ The following functions and structs are used to create .. versionadded:: 3.12 + .. c:function:: PyObject* PyType_FromModuleAndSpec(PyObject *module, PyType_Spec *spec, PyObject *bases) Equivalent to ``PyType_FromMetaclass(NULL, module, spec, bases)``. @@ -390,6 +455,7 @@ The following functions and structs are used to create Creating classes whose metaclass overrides :c:member:`~PyTypeObject.tp_new` is no longer allowed. + .. c:function:: PyObject* PyType_FromSpecWithBases(PyType_Spec *spec, PyObject *bases) Equivalent to ``PyType_FromMetaclass(NULL, NULL, spec, bases)``. @@ -411,6 +477,7 @@ The following functions and structs are used to create Creating classes whose metaclass overrides :c:member:`~PyTypeObject.tp_new` is no longer allowed. + .. c:function:: PyObject* PyType_FromSpec(PyType_Spec *spec) Equivalent to ``PyType_FromMetaclass(NULL, NULL, spec, NULL)``. @@ -431,6 +498,7 @@ The following functions and structs are used to create Creating classes whose metaclass overrides :c:member:`~PyTypeObject.tp_new` is no longer allowed. + .. c:function:: int PyType_Freeze(PyTypeObject *type) Make a type immutable: set the :c:macro:`Py_TPFLAGS_IMMUTABLETYPE` flag. @@ -539,9 +607,9 @@ The following functions and structs are used to create :c:type:`PyAsyncMethods` with an added ``Py_`` prefix. For example, use: - * ``Py_tp_dealloc`` to set :c:member:`PyTypeObject.tp_dealloc` - * ``Py_nb_add`` to set :c:member:`PyNumberMethods.nb_add` - * ``Py_sq_length`` to set :c:member:`PySequenceMethods.sq_length` + * :c:data:`Py_tp_dealloc` to set :c:member:`PyTypeObject.tp_dealloc` + * :c:data:`Py_nb_add` to set :c:member:`PyNumberMethods.nb_add` + * :c:data:`Py_sq_length` to set :c:member:`PySequenceMethods.sq_length` An additional slot is supported that does not correspond to a :c:type:`!PyTypeObject` struct field: @@ -560,7 +628,7 @@ The following functions and structs are used to create If it is not possible to switch to a ``MANAGED`` flag (for example, for vectorcall or to support Python older than 3.12), specify the - offset in :c:member:`Py_tp_members `. + offset in :c:data:`Py_tp_members`. See :ref:`PyMemberDef documentation ` for details. @@ -587,8 +655,8 @@ The following functions and structs are used to create under the :ref:`limited API `. .. versionchanged:: 3.14 - The field :c:member:`~PyTypeObject.tp_vectorcall` can now set - using ``Py_tp_vectorcall``. See the field's documentation + The field :c:member:`~PyTypeObject.tp_vectorcall` can now be set + using :c:data:`Py_tp_vectorcall`. See the field's documentation for details. .. c:member:: void *pfunc @@ -598,10 +666,11 @@ The following functions and structs are used to create *pfunc* values may not be ``NULL``, except for the following slots: - * ``Py_tp_doc`` + * :c:data:`Py_tp_doc` * :c:data:`Py_tp_token` (for clarity, prefer :c:data:`Py_TP_USE_SPEC` rather than ``NULL``) + .. c:macro:: Py_tp_token A :c:member:`~PyType_Slot.slot` that records a static memory layout ID diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 59c26a713e4..49fe02d919d 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -676,6 +676,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_dealloc + .. corresponding-type-slot:: Py_tp_dealloc + A pointer to the instance destructor function. The function signature is:: void tp_dealloc(PyObject *self); @@ -860,6 +862,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: getattrfunc PyTypeObject.tp_getattr + .. corresponding-type-slot:: Py_tp_getattr + An optional pointer to the get-attribute-string function. This field is deprecated. When it is defined, it should point to a function @@ -877,6 +881,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: setattrfunc PyTypeObject.tp_setattr + .. corresponding-type-slot:: Py_tp_setattr + An optional pointer to the function for setting and deleting attributes. This field is deprecated. When it is defined, it should point to a function @@ -909,6 +915,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: reprfunc PyTypeObject.tp_repr + .. corresponding-type-slot:: Py_tp_repr + .. index:: pair: built-in function; repr An optional pointer to a function that implements the built-in function @@ -974,6 +982,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: hashfunc PyTypeObject.tp_hash + .. corresponding-type-slot:: Py_tp_hash + .. index:: pair: built-in function; hash An optional pointer to a function that implements the built-in function @@ -1015,6 +1025,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: ternaryfunc PyTypeObject.tp_call + .. corresponding-type-slot:: Py_tp_call + An optional pointer to a function that implements calling the object. This should be ``NULL`` if the object is not callable. The signature is the same as for :c:func:`PyObject_Call`:: @@ -1028,6 +1040,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: reprfunc PyTypeObject.tp_str + .. corresponding-type-slot:: Py_tp_str + An optional pointer to a function that implements the built-in operation :func:`str`. (Note that :class:`str` is a type now, and :func:`str` calls the constructor for that type. This constructor calls :c:func:`PyObject_Str` to do @@ -1053,6 +1067,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: getattrofunc PyTypeObject.tp_getattro + .. corresponding-type-slot:: Py_tp_getattro + An optional pointer to the get-attribute function. The signature is the same as for :c:func:`PyObject_GetAttr`:: @@ -1077,6 +1093,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: setattrofunc PyTypeObject.tp_setattro + .. corresponding-type-slot:: Py_tp_setattro + An optional pointer to the function for setting and deleting attributes. The signature is the same as for :c:func:`PyObject_SetAttr`:: @@ -1333,8 +1351,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_BASE_EXC_SUBCLASS .. c:macro:: Py_TPFLAGS_TYPE_SUBCLASS - These flags are used by functions such as - :c:func:`PyLong_Check` to quickly determine if a type is a subclass + Functions such as :c:func:`PyLong_Check` will call :c:func:`PyType_FastSubclass` + with one of these flags to quickly determine if a type is a subclass of a built-in type; such specific checks are faster than a generic check, like :c:func:`PyObject_IsInstance`. Custom types that inherit from built-ins should have their :c:member:`~PyTypeObject.tp_flags` @@ -1475,6 +1493,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: const char* PyTypeObject.tp_doc + .. corresponding-type-slot:: Py_tp_doc + An optional pointer to a NUL-terminated C string giving the docstring for this type object. This is exposed as the :attr:`~type.__doc__` attribute on the type and instances of the type. @@ -1486,6 +1506,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: traverseproc PyTypeObject.tp_traverse + .. corresponding-type-slot:: Py_tp_traverse + An optional pointer to a traversal function for the garbage collector. This is only used if the :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit is set. The signature is:: @@ -1582,6 +1604,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: inquiry PyTypeObject.tp_clear + .. corresponding-type-slot:: Py_tp_clear + An optional pointer to a clear function. The signature is:: int tp_clear(PyObject *); @@ -1730,6 +1754,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: richcmpfunc PyTypeObject.tp_richcompare + .. corresponding-type-slot:: Py_tp_richcompare + An optional pointer to the rich comparison function, whose signature is:: PyObject *tp_richcompare(PyObject *self, PyObject *other, int op); @@ -1832,6 +1858,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: getiterfunc PyTypeObject.tp_iter + .. corresponding-type-slot:: Py_tp_iter + An optional pointer to a function that returns an :term:`iterator` for the object. Its presence normally signals that the instances of this type are :term:`iterable` (although sequences may be iterable without this function). @@ -1847,6 +1875,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: iternextfunc PyTypeObject.tp_iternext + .. corresponding-type-slot:: Py_tp_iternext + An optional pointer to a function that returns the next item in an :term:`iterator`. The signature is:: @@ -1870,6 +1900,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: struct PyMethodDef* PyTypeObject.tp_methods + .. corresponding-type-slot:: Py_tp_methods + An optional pointer to a static ``NULL``-terminated array of :c:type:`PyMethodDef` structures, declaring regular methods of this type. @@ -1884,6 +1916,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: struct PyMemberDef* PyTypeObject.tp_members + .. corresponding-type-slot:: Py_tp_members + An optional pointer to a static ``NULL``-terminated array of :c:type:`PyMemberDef` structures, declaring regular data members (fields or slots) of instances of this type. @@ -1899,6 +1933,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: struct PyGetSetDef* PyTypeObject.tp_getset + .. corresponding-type-slot:: Py_tp_getset + An optional pointer to a static ``NULL``-terminated array of :c:type:`PyGetSetDef` structures, declaring computed attributes of instances of this type. @@ -1913,6 +1949,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: PyTypeObject* PyTypeObject.tp_base + .. corresponding-type-slot:: Py_tp_base + An optional pointer to a base type from which type properties are inherited. At this level, only single inheritance is supported; multiple inheritance require dynamically creating a type object by calling the metatype. @@ -1985,6 +2023,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: descrgetfunc PyTypeObject.tp_descr_get + .. corresponding-type-slot:: Py_tp_descr_get + An optional pointer to a "descriptor get" function. The function signature is:: @@ -2000,6 +2040,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: descrsetfunc PyTypeObject.tp_descr_set + .. corresponding-type-slot:: Py_tp_descr_set + An optional pointer to a function for setting and deleting a descriptor's value. @@ -2060,6 +2102,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: initproc PyTypeObject.tp_init + .. corresponding-type-slot:: Py_tp_init + An optional pointer to an instance initialization function. This function corresponds to the :meth:`~object.__init__` method of classes. Like @@ -2095,6 +2139,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: allocfunc PyTypeObject.tp_alloc + .. corresponding-type-slot:: Py_tp_alloc + An optional pointer to an instance allocation function. The function signature is:: @@ -2118,6 +2164,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: newfunc PyTypeObject.tp_new + .. corresponding-type-slot:: Py_tp_new + An optional pointer to an instance creation function. The function signature is:: @@ -2157,6 +2205,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: freefunc PyTypeObject.tp_free + .. corresponding-type-slot:: Py_tp_free + An optional pointer to an instance deallocation function. Its signature is:: void tp_free(void *self); @@ -2186,6 +2236,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: inquiry PyTypeObject.tp_is_gc + .. corresponding-type-slot:: Py_tp_is_gc + An optional pointer to a function called by the garbage collector. The garbage collector needs to know whether a particular object is collectible @@ -2214,12 +2266,14 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: PyObject* PyTypeObject.tp_bases + .. corresponding-type-slot:: Py_tp_bases + Tuple of base types. This field should be set to ``NULL`` and treated as read-only. Python will fill it in when the type is :c:func:`initialized `. - For dynamically created classes, the ``Py_tp_bases`` + For dynamically created classes, the :c:data:`Py_tp_bases` :c:type:`slot ` can be used instead of the *bases* argument of :c:func:`PyType_FromSpecWithBases`. The argument form is preferred. @@ -2294,6 +2348,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_del + .. corresponding-type-slot:: Py_tp_del + This field is deprecated. Use :c:member:`~PyTypeObject.tp_finalize` instead. @@ -2308,6 +2364,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_finalize + .. corresponding-type-slot:: Py_tp_finalize + An optional pointer to an instance finalization function. This is the C implementation of the :meth:`~object.__del__` special method. Its signature is:: @@ -2466,6 +2524,8 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: vectorcallfunc PyTypeObject.tp_vectorcall + .. corresponding-type-slot:: Py_tp_vectorcall + A :ref:`vectorcall function ` to use for calls of this type object (rather than instances). In other words, ``tp_vectorcall`` can be used to optimize ``type.__call__``, @@ -2631,42 +2691,148 @@ Number Object Structures Python 3.0.1. .. c:member:: binaryfunc PyNumberMethods.nb_add + + .. corresponding-type-slot:: Py_nb_add + .. c:member:: binaryfunc PyNumberMethods.nb_subtract + + .. corresponding-type-slot:: Py_nb_subtract + .. c:member:: binaryfunc PyNumberMethods.nb_multiply + + .. corresponding-type-slot:: Py_nb_multiply + .. c:member:: binaryfunc PyNumberMethods.nb_remainder + + .. corresponding-type-slot:: Py_nb_remainder + .. c:member:: binaryfunc PyNumberMethods.nb_divmod + + .. corresponding-type-slot:: Py_nb_divmod + .. c:member:: ternaryfunc PyNumberMethods.nb_power + + .. corresponding-type-slot:: Py_nb_power + .. c:member:: unaryfunc PyNumberMethods.nb_negative + + .. corresponding-type-slot:: Py_nb_negative + .. c:member:: unaryfunc PyNumberMethods.nb_positive + + .. corresponding-type-slot:: Py_nb_positive + .. c:member:: unaryfunc PyNumberMethods.nb_absolute + + .. corresponding-type-slot:: Py_nb_absolute + .. c:member:: inquiry PyNumberMethods.nb_bool + + .. corresponding-type-slot:: Py_nb_bool + .. c:member:: unaryfunc PyNumberMethods.nb_invert + + .. corresponding-type-slot:: Py_nb_invert + .. c:member:: binaryfunc PyNumberMethods.nb_lshift + + .. corresponding-type-slot:: Py_nb_lshift + .. c:member:: binaryfunc PyNumberMethods.nb_rshift + + .. corresponding-type-slot:: Py_nb_rshift + .. c:member:: binaryfunc PyNumberMethods.nb_and + + .. corresponding-type-slot:: Py_nb_and + .. c:member:: binaryfunc PyNumberMethods.nb_xor + + .. corresponding-type-slot:: Py_nb_xor + .. c:member:: binaryfunc PyNumberMethods.nb_or + + .. corresponding-type-slot:: Py_nb_or + .. c:member:: unaryfunc PyNumberMethods.nb_int + + .. corresponding-type-slot:: Py_nb_int + .. c:member:: void *PyNumberMethods.nb_reserved + .. c:member:: unaryfunc PyNumberMethods.nb_float + + .. corresponding-type-slot:: Py_nb_float + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_add + + .. corresponding-type-slot:: Py_nb_inplace_add + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_subtract + + .. corresponding-type-slot:: Py_nb_inplace_subtract + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_multiply + + .. corresponding-type-slot:: Py_nb_inplace_multiply + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_remainder + + .. corresponding-type-slot:: Py_nb_inplace_remainder + .. c:member:: ternaryfunc PyNumberMethods.nb_inplace_power + + .. corresponding-type-slot:: Py_nb_inplace_power + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_lshift + + .. corresponding-type-slot:: Py_nb_inplace_lshift + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_rshift + + .. corresponding-type-slot:: Py_nb_inplace_rshift + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_and + + .. corresponding-type-slot:: Py_nb_inplace_and + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_xor + + .. corresponding-type-slot:: Py_nb_inplace_xor + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_or + + .. corresponding-type-slot:: Py_nb_inplace_or + .. c:member:: binaryfunc PyNumberMethods.nb_floor_divide + + .. corresponding-type-slot:: Py_nb_floor_divide + .. c:member:: binaryfunc PyNumberMethods.nb_true_divide + + .. corresponding-type-slot:: Py_nb_true_divide + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_floor_divide + + .. corresponding-type-slot:: Py_nb_inplace_floor_divide + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_true_divide + + .. corresponding-type-slot:: Py_nb_inplace_true_divide + .. c:member:: unaryfunc PyNumberMethods.nb_index + + .. corresponding-type-slot:: Py_nb_index + .. c:member:: binaryfunc PyNumberMethods.nb_matrix_multiply + + .. corresponding-type-slot:: Py_nb_matrix_multiply + .. c:member:: binaryfunc PyNumberMethods.nb_inplace_matrix_multiply + .. corresponding-type-slot:: Py_nb_inplace_matrix_multiply + + .. _mapping-structs: @@ -2683,12 +2849,16 @@ Mapping Object Structures .. c:member:: lenfunc PyMappingMethods.mp_length + .. corresponding-type-slot:: Py_mp_length + This function is used by :c:func:`PyMapping_Size` and :c:func:`PyObject_Size`, and has the same signature. This slot may be set to ``NULL`` if the object has no defined length. .. c:member:: binaryfunc PyMappingMethods.mp_subscript + .. corresponding-type-slot:: Py_mp_subscript + This function is used by :c:func:`PyObject_GetItem` and :c:func:`PySequence_GetSlice`, and has the same signature as :c:func:`!PyObject_GetItem`. This slot must be filled for the @@ -2697,6 +2867,8 @@ Mapping Object Structures .. c:member:: objobjargproc PyMappingMethods.mp_ass_subscript + .. corresponding-type-slot:: Py_mp_ass_subscript + This function is used by :c:func:`PyObject_SetItem`, :c:func:`PyObject_DelItem`, :c:func:`PySequence_SetSlice` and :c:func:`PySequence_DelSlice`. It has the same signature as @@ -2720,6 +2892,8 @@ Sequence Object Structures .. c:member:: lenfunc PySequenceMethods.sq_length + .. corresponding-type-slot:: Py_sq_length + This function is used by :c:func:`PySequence_Size` and :c:func:`PyObject_Size`, and has the same signature. It is also used for handling negative indices via the :c:member:`~PySequenceMethods.sq_item` @@ -2727,18 +2901,24 @@ Sequence Object Structures .. c:member:: binaryfunc PySequenceMethods.sq_concat + .. corresponding-type-slot:: Py_sq_concat + This function is used by :c:func:`PySequence_Concat` and has the same signature. It is also used by the ``+`` operator, after trying the numeric addition via the :c:member:`~PyNumberMethods.nb_add` slot. .. c:member:: ssizeargfunc PySequenceMethods.sq_repeat + .. corresponding-type-slot:: Py_sq_repeat + This function is used by :c:func:`PySequence_Repeat` and has the same signature. It is also used by the ``*`` operator, after trying numeric multiplication via the :c:member:`~PyNumberMethods.nb_multiply` slot. .. c:member:: ssizeargfunc PySequenceMethods.sq_item + .. corresponding-type-slot:: Py_sq_item + This function is used by :c:func:`PySequence_GetItem` and has the same signature. It is also used by :c:func:`PyObject_GetItem`, after trying the subscription via the :c:member:`~PyMappingMethods.mp_subscript` slot. @@ -2752,6 +2932,8 @@ Sequence Object Structures .. c:member:: ssizeobjargproc PySequenceMethods.sq_ass_item + .. corresponding-type-slot:: Py_sq_ass_item + This function is used by :c:func:`PySequence_SetItem` and has the same signature. It is also used by :c:func:`PyObject_SetItem` and :c:func:`PyObject_DelItem`, after trying the item assignment and deletion @@ -2761,6 +2943,8 @@ Sequence Object Structures .. c:member:: objobjproc PySequenceMethods.sq_contains + .. corresponding-type-slot:: Py_sq_contains + This function may be used by :c:func:`PySequence_Contains` and has the same signature. This slot may be left to ``NULL``, in this case :c:func:`!PySequence_Contains` simply traverses the sequence until it @@ -2768,6 +2952,8 @@ Sequence Object Structures .. c:member:: binaryfunc PySequenceMethods.sq_inplace_concat + .. corresponding-type-slot:: Py_sq_inplace_concat + This function is used by :c:func:`PySequence_InPlaceConcat` and has the same signature. It should modify its first operand, and return it. This slot may be left to ``NULL``, in this case :c:func:`!PySequence_InPlaceConcat` @@ -2777,6 +2963,8 @@ Sequence Object Structures .. c:member:: ssizeargfunc PySequenceMethods.sq_inplace_repeat + .. corresponding-type-slot:: Py_sq_inplace_repeat + This function is used by :c:func:`PySequence_InPlaceRepeat` and has the same signature. It should modify its first operand, and return it. This slot may be left to ``NULL``, in this case :c:func:`!PySequence_InPlaceRepeat` @@ -2802,6 +2990,8 @@ Buffer Object Structures .. c:member:: getbufferproc PyBufferProcs.bf_getbuffer + .. corresponding-type-slot:: Py_bf_getbuffer + The signature of this function is:: int (PyObject *exporter, Py_buffer *view, int flags); @@ -2851,6 +3041,8 @@ Buffer Object Structures .. c:member:: releasebufferproc PyBufferProcs.bf_releasebuffer + .. corresponding-type-slot:: Py_bf_releasebuffer + The signature of this function is:: void (PyObject *exporter, Py_buffer *view); @@ -2905,6 +3097,8 @@ Async Object Structures .. c:member:: unaryfunc PyAsyncMethods.am_await + .. corresponding-type-slot:: Py_am_await + The signature of this function is:: PyObject *am_await(PyObject *self); @@ -2916,6 +3110,8 @@ Async Object Structures .. c:member:: unaryfunc PyAsyncMethods.am_aiter + .. corresponding-type-slot:: Py_am_aiter + The signature of this function is:: PyObject *am_aiter(PyObject *self); @@ -2928,6 +3124,8 @@ Async Object Structures .. c:member:: unaryfunc PyAsyncMethods.am_anext + .. corresponding-type-slot:: Py_am_anext + The signature of this function is:: PyObject *am_anext(PyObject *self); @@ -2938,6 +3136,8 @@ Async Object Structures .. c:member:: sendfunc PyAsyncMethods.am_send + .. corresponding-type-slot:: Py_am_send + The signature of this function is:: PySendResult am_send(PyObject *self, PyObject *arg, PyObject **result); diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 22b0a6aff6e..ca7c8bb11a5 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -321,12 +321,22 @@ These APIs can be used to work with surrogates: Check if *ch* is a low surrogate (``0xDC00 <= ch <= 0xDFFF``). +.. c:function:: Py_UCS4 Py_UNICODE_HIGH_SURROGATE(Py_UCS4 ch) + + Return the high UTF-16 surrogate (``0xD800`` to ``0xDBFF``) for a Unicode + code point in the range ``[0x10000; 0x10FFFF]``. + +.. c:function:: Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) + + Return the low UTF-16 surrogate (``0xDC00`` to ``0xDFFF``) for a Unicode + code point in the range ``[0x10000; 0x10FFFF]``. + .. c:function:: Py_UCS4 Py_UNICODE_JOIN_SURROGATES(Py_UCS4 high, Py_UCS4 low) Join two surrogate code points and return a single :c:type:`Py_UCS4` value. *high* and *low* are respectively the leading and trailing surrogates in a - surrogate pair. *high* must be in the range [0xD800; 0xDBFF] and *low* must - be in the range [0xDC00; 0xDFFF]. + surrogate pair. *high* must be in the range ``[0xD800; 0xDBFF]`` and *low* must + be in the range ``[0xDC00; 0xDFFF]``. Creating and accessing Unicode strings diff --git a/Doc/c-api/veryhigh.rst b/Doc/c-api/veryhigh.rst index ee0595a9e08..7eb9f0b54ab 100644 --- a/Doc/c-api/veryhigh.rst +++ b/Doc/c-api/veryhigh.rst @@ -13,8 +13,9 @@ the interpreter. Several of these functions accept a start symbol from the grammar as a parameter. The available start symbols are :c:data:`Py_eval_input`, -:c:data:`Py_file_input`, and :c:data:`Py_single_input`. These are described -following the functions which accept them as parameters. +:c:data:`Py_file_input`, :c:data:`Py_single_input`, and +:c:data:`Py_func_type_input`. These are described following the functions +which accept them as parameters. Note also that several of these functions take :c:expr:`FILE*` parameters. One particular issue which needs to be handled carefully is that the :c:type:`FILE` @@ -99,6 +100,20 @@ the same library that the Python runtime is using. Otherwise, Python may not handle script file with LF line ending correctly. +.. c:function:: int PyRun_InteractiveOneObject(FILE *fp, PyObject *filename, PyCompilerFlags *flags) + + Read and execute a single statement from a file associated with an + interactive device according to the *flags* argument. The user will be + prompted using ``sys.ps1`` and ``sys.ps2``. *filename* must be a Python + :class:`str` object. + + Returns ``0`` when the input was + executed successfully, ``-1`` if there was an exception, or an error code + from the :file:`errcode.h` include file distributed as part of Python if + there was a parse error. (Note that :file:`errcode.h` is not included by + :file:`Python.h`, so must be included specifically if needed.) + + .. c:function:: int PyRun_InteractiveOne(FILE *fp, const char *filename) This is a simplified interface to :c:func:`PyRun_InteractiveOneFlags` below, @@ -107,17 +122,10 @@ the same library that the Python runtime is using. .. c:function:: int PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags) - Read and execute a single statement from a file associated with an - interactive device according to the *flags* argument. The user will be - prompted using ``sys.ps1`` and ``sys.ps2``. *filename* is decoded from the + Similar to :c:func:`PyRun_InteractiveOneObject`, but *filename* is a + :c:expr:`const char*`, which is decoded from the :term:`filesystem encoding and error handler`. - Returns ``0`` when the input was - executed successfully, ``-1`` if there was an exception, or an error code - from the :file:`errcode.h` include file distributed as part of Python if - there was a parse error. (Note that :file:`errcode.h` is not included by - :file:`Python.h`, so must be included specifically if needed.) - .. c:function:: int PyRun_InteractiveLoop(FILE *fp, const char *filename) @@ -140,7 +148,7 @@ the same library that the Python runtime is using. interpreter prompt is about to become idle and wait for user input from the terminal. The return value is ignored. Overriding this hook can be used to integrate the interpreter's prompt with other - event loops, as done in the :file:`Modules/_tkinter.c` in the + event loops, as done in :file:`Modules/_tkinter.c` in the Python source code. .. versionchanged:: 3.12 @@ -183,8 +191,7 @@ the same library that the Python runtime is using. objects *globals* and *locals* with the compiler flags specified by *flags*. *globals* must be a dictionary; *locals* can be any object that implements the mapping protocol. The parameter *start* specifies - the start symbol and must one of the following: - :c:data:`Py_eval_input`, :c:data:`Py_file_input`, or :c:data:`Py_single_input`. + the start symbol and must one of the :ref:`available start symbols `. Returns the result of executing the code as a Python object, or ``NULL`` if an exception was raised. @@ -233,8 +240,8 @@ the same library that the Python runtime is using. Parse and compile the Python source code in *str*, returning the resulting code object. The start symbol is given by *start*; this can be used to constrain the - code which can be compiled and should be :c:data:`Py_eval_input`, - :c:data:`Py_file_input`, or :c:data:`Py_single_input`. The filename specified by + code which can be compiled and should be :ref:`available start symbols + `. The filename specified by *filename* is used to construct the code object and may appear in tracebacks or :exc:`SyntaxError` exception messages. This returns ``NULL`` if the code cannot be parsed or compiled. @@ -297,32 +304,6 @@ the same library that the Python runtime is using. true on success, false on failure. -.. c:var:: int Py_eval_input - - .. index:: single: Py_CompileString (C function) - - The start symbol from the Python grammar for isolated expressions; for use with - :c:func:`Py_CompileString`. - - -.. c:var:: int Py_file_input - - .. index:: single: Py_CompileString (C function) - - The start symbol from the Python grammar for sequences of statements as read - from a file or other source; for use with :c:func:`Py_CompileString`. This is - the symbol to use when compiling arbitrarily long Python source code. - - -.. c:var:: int Py_single_input - - .. index:: single: Py_CompileString (C function) - - The start symbol from the Python grammar for a single statement; for use with - :c:func:`Py_CompileString`. This is the symbol used for the interactive - interpreter loop. - - .. c:struct:: PyCompilerFlags This is the structure used to hold compiler flags. In cases where code is only @@ -366,3 +347,92 @@ the same library that the Python runtime is using. as :c:macro:`CO_FUTURE_ANNOTATIONS` to enable features normally selectable using :ref:`future statements `. See :ref:`c_codeobject_flags` for a complete list. + + +.. _start-symbols: + +Available start symbols +^^^^^^^^^^^^^^^^^^^^^^^ + + +.. c:var:: int Py_eval_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for isolated expressions; for use with + :c:func:`Py_CompileString`. + + +.. c:var:: int Py_file_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for sequences of statements as read + from a file or other source; for use with :c:func:`Py_CompileString`. This is + the symbol to use when compiling arbitrarily long Python source code. + + +.. c:var:: int Py_single_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for a single statement; for use with + :c:func:`Py_CompileString`. This is the symbol used for the interactive + interpreter loop. + + +.. c:var:: int Py_func_type_input + + .. index:: single: Py_CompileString (C function) + + The start symbol from the Python grammar for a function type; for use with + :c:func:`Py_CompileString`. This is used to parse "signature type comments" + from :pep:`484`. + + This requires the :c:macro:`PyCF_ONLY_AST` flag to be set. + + .. seealso:: + * :py:class:`ast.FunctionType` + * :pep:`484` + + .. versionadded:: 3.8 + + +Stack Effects +^^^^^^^^^^^^^ + +.. seealso:: + :py:func:`dis.stack_effect` + + +.. c:macro:: PY_INVALID_STACK_EFFECT + + Sentinel value representing an invalid stack effect. + + This is currently equivalent to ``INT_MAX``. + + .. versionadded:: 3.8 + + +.. c:function:: int PyCompile_OpcodeStackEffect(int opcode, int oparg) + + Compute the stack effect of *opcode* with argument *oparg*. + + On success, this function returns the stack effect; on failure, this + returns :c:macro:`PY_INVALID_STACK_EFFECT`. + + .. versionadded:: 3.4 + + +.. c:function:: int PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump) + + Similar to :c:func:`PyCompile_OpcodeStackEffect`, but don't include the + stack effect of jumping if *jump* is zero. + + If *jump* is ``0``, this will not include the stack effect of jumping, but + if *jump* is ``1`` or ``-1``, this will include it. + + On success, this function returns the stack effect; on failure, this + returns :c:macro:`PY_INVALID_STACK_EFFECT`. + + .. versionadded:: 3.8 diff --git a/Doc/c-api/weakref.rst b/Doc/c-api/weakref.rst index 14ec9d951c4..db6ae0a9d4e 100644 --- a/Doc/c-api/weakref.rst +++ b/Doc/c-api/weakref.rst @@ -19,7 +19,14 @@ as much as it can. .. c:function:: int PyWeakref_CheckRef(PyObject *ob) - Return non-zero if *ob* is a reference object. This function always succeeds. + Return non-zero if *ob* is a reference object or a subclass of the reference + type. This function always succeeds. + + +.. c:function:: int PyWeakref_CheckRefExact(PyObject *ob) + + Return non-zero if *ob* is a reference object, but not a subclass of the + reference type. This function always succeeds. .. c:function:: int PyWeakref_CheckProxy(PyObject *ob) @@ -38,6 +45,10 @@ as much as it can. weakly referenceable object, or if *callback* is not callable, ``None``, or ``NULL``, this will return ``NULL`` and raise :exc:`TypeError`. + .. seealso:: + :c:func:`PyType_SUPPORTS_WEAKREFS` for checking if *ob* is weakly + referenceable. + .. c:function:: PyObject* PyWeakref_NewProxy(PyObject *ob, PyObject *callback) @@ -50,6 +61,10 @@ as much as it can. is not a weakly referenceable object, or if *callback* is not callable, ``None``, or ``NULL``, this will return ``NULL`` and raise :exc:`TypeError`. + .. seealso:: + :c:func:`PyType_SUPPORTS_WEAKREFS` for checking if *ob* is weakly + referenceable. + .. c:function:: int PyWeakref_GetRef(PyObject *ref, PyObject **pobj) diff --git a/Doc/conf.py b/Doc/conf.py index f1dda10052e..a4275835059 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -226,9 +226,6 @@ # Temporary undocumented names. # In future this list must be empty. nitpick_ignore += [ - # Undocumented public C macros - ('c:macro', 'Py_BUILD_ASSERT'), - ('c:macro', 'Py_BUILD_ASSERT_EXPR'), # Do not error nit-picky mode builds when _SubParsersAction.add_parser cannot # be resolved, as the method is currently undocumented. For context, see # https://github.com/python/cpython/pull/103289. @@ -364,7 +361,7 @@ # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). -_stdauthor = 'Guido van Rossum and the Python development team' +_stdauthor = 'The Python development team' latex_documents = [ ('c-api/index', 'c-api.tex', 'The Python/C API', _stdauthor, 'manual'), ( diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 48f4f4919e8..64399f6ab1f 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1472,6 +1472,9 @@ PyModule_Create2:PyObject*::+1: PyModule_Create2:PyModuleDef*:def:: PyModule_Create2:int:module_api_version:: +PyModule_Exec:int::: +PyModule_ExecDef:PyObject*:module:0: + PyModule_ExecDef:int::: PyModule_ExecDef:PyObject*:module:0: PyModule_ExecDef:PyModuleDef*:def:: @@ -1485,6 +1488,10 @@ PyModule_FromDefAndSpec2:PyModuleDef*:def:: PyModule_FromDefAndSpec2:PyObject*:spec:0: PyModule_FromDefAndSpec2:int:module_api_version:: +PyModule_FromSlotsAndSpec:PyObject*::+1: +PyModule_FromSlotsAndSpec:const PyModuleDef_Slot *:slots:: +PyModule_FromSlotsAndSpec:PyObject*:spec:0: + PyModule_GetDef:PyModuleDef*::0: PyModule_GetDef:PyObject*:module:0: @@ -1506,6 +1513,14 @@ PyModule_GetNameObject:PyObject*:module:0: PyModule_GetState:void*::: PyModule_GetState:PyObject*:module:0: +PyModule_GetStateSize:int::: +PyModule_GetStateSize:PyObject*:module:0: +PyModule_GetToken:Py_ssize_t**:result:: + +PyModule_GetToken:int::: +PyModule_GetToken:PyObject*:module:0: +PyModule_GetToken:void**:result:: + PyModule_New:PyObject*::+1: PyModule_New:char*:name:: @@ -2412,6 +2427,10 @@ PyType_GetFlags:PyTypeObject*:type:0: PyType_GetName:PyObject*::+1: PyType_GetName:PyTypeObject*:type:0: +PyType_GetModuleByToken:PyObject*::+1: +PyType_GetModuleByToken:PyTypeObject*:type:0: +PyType_GetModuleByToken:PyModuleDef*:def:: + PyType_GetModuleByDef:PyObject*::0: PyType_GetModuleByDef:PyTypeObject*:type:0: PyType_GetModuleByDef:PyModuleDef*:def:: diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 7ad5f3ecfab..9c5fdcefaf8 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -1,7 +1,21 @@ role,name,added,ifdef_note,struct_abi_kind +macro,METH_CLASS,3.2,, +macro,METH_COEXIST,3.2,, +macro,METH_FASTCALL,3.7,, +macro,METH_METHOD,3.7,, +macro,METH_NOARGS,3.2,, +macro,METH_O,3.2,, +macro,METH_STATIC,3.2,, +macro,METH_VARARGS,3.2,, macro,PY_VECTORCALL_ARGUMENTS_OFFSET,3.12,, type,PyABIInfo,3.15,,full-abi func,PyABIInfo_Check,3.15,, +macro,PyABIInfo_DEFAULT_ABI_VERSION,3.15,, +macro,PyABIInfo_DEFAULT_FLAGS,3.15,, +macro,PyABIInfo_FREETHREADED,3.15,, +macro,PyABIInfo_FREETHREADING_AGNOSTIC,3.15,, +macro,PyABIInfo_GIL,3.15,, +macro,PyABIInfo_STABLE,3.15,, macro,PyABIInfo_VAR,3.15,, func,PyAIter_Check,3.10,, func,PyArg_Parse,3.2,, @@ -11,6 +25,26 @@ func,PyArg_UnpackTuple,3.2,, func,PyArg_VaParse,3.2,, func,PyArg_VaParseTupleAndKeywords,3.2,, func,PyArg_ValidateKeywordArguments,3.2,, +macro,PyBUF_ANY_CONTIGUOUS,3.11,, +macro,PyBUF_CONTIG,3.11,, +macro,PyBUF_CONTIG_RO,3.11,, +macro,PyBUF_C_CONTIGUOUS,3.11,, +macro,PyBUF_FORMAT,3.11,, +macro,PyBUF_FULL,3.11,, +macro,PyBUF_FULL_RO,3.11,, +macro,PyBUF_F_CONTIGUOUS,3.11,, +macro,PyBUF_INDIRECT,3.11,, +macro,PyBUF_MAX_NDIM,3.11,, +macro,PyBUF_ND,3.11,, +macro,PyBUF_READ,3.11,, +macro,PyBUF_RECORDS,3.11,, +macro,PyBUF_RECORDS_RO,3.11,, +macro,PyBUF_SIMPLE,3.11,, +macro,PyBUF_STRIDED,3.11,, +macro,PyBUF_STRIDED_RO,3.11,, +macro,PyBUF_STRIDES,3.11,, +macro,PyBUF_WRITABLE,3.11,, +macro,PyBUF_WRITE,3.11,, data,PyBaseObject_Type,3.2,, func,PyBool_FromLong,3.2,, data,PyBool_Type,3.2,, @@ -126,6 +160,7 @@ func,PyDict_Merge,3.2,, func,PyDict_MergeFromSeq2,3.2,, func,PyDict_New,3.2,, func,PyDict_Next,3.2,, +func,PyDict_SetDefaultRef,3.15,, func,PyDict_SetItem,3.2,, func,PyDict_SetItemString,3.2,, func,PyDict_Size,3.2,, @@ -391,6 +426,7 @@ func,PyLong_FromUnsignedNativeBytes,3.14,, func,PyLong_FromVoidPtr,3.2,, func,PyLong_GetInfo,3.2,, data,PyLong_Type,3.2,, +macro,PyMODEXPORT_FUNC,3.15,, data,PyMap_Type,3.2,, func,PyMapping_Check,3.2,, func,PyMapping_GetItemString,3.2,, @@ -428,6 +464,7 @@ data,PyMethodDescr_Type,3.2,, type,PyModuleDef,3.2,,full-abi type,PyModuleDef_Base,3.2,,full-abi func,PyModuleDef_Init,3.5,, +type,PyModuleDef_Slot,3.5,,full-abi data,PyModuleDef_Type,3.5,, func,PyModule_Add,3.13,, func,PyModule_AddFunctions,3.7,, @@ -437,8 +474,10 @@ func,PyModule_AddObjectRef,3.10,, func,PyModule_AddStringConstant,3.2,, func,PyModule_AddType,3.10,, func,PyModule_Create2,3.2,, +func,PyModule_Exec,3.15,, func,PyModule_ExecDef,3.7,, func,PyModule_FromDefAndSpec2,3.7,, +func,PyModule_FromSlotsAndSpec,3.15,, func,PyModule_GetDef,3.2,, func,PyModule_GetDict,3.2,, func,PyModule_GetFilename,3.2,, @@ -446,6 +485,8 @@ func,PyModule_GetFilenameObject,3.2,, func,PyModule_GetName,3.2,, func,PyModule_GetNameObject,3.7,, func,PyModule_GetState,3.2,, +func,PyModule_GetStateSize,3.15,, +func,PyModule_GetToken,3.15,, func,PyModule_New,3.2,, func,PyModule_NewObject,3.7,, func,PyModule_SetDocString,3.7,, @@ -704,6 +745,7 @@ func,PyType_GetFlags,3.2,, func,PyType_GetFullyQualifiedName,3.13,, func,PyType_GetModule,3.10,, func,PyType_GetModuleByDef,3.13,, +func,PyType_GetModuleByToken,3.15,, func,PyType_GetModuleName,3.13,, func,PyType_GetModuleState,3.10,, func,PyType_GetName,3.11,, @@ -836,6 +878,14 @@ func,PyWeakref_NewRef,3.2,, data,PyWrapperDescr_Type,3.2,, func,PyWrapper_New,3.2,, data,PyZip_Type,3.2,, +macro,Py_ASNATIVEBYTES_ALLOW_INDEX,3.14,, +macro,Py_ASNATIVEBYTES_BIG_ENDIAN,3.14,, +macro,Py_ASNATIVEBYTES_DEFAULTS,3.14,, +macro,Py_ASNATIVEBYTES_LITTLE_ENDIAN,3.14,, +macro,Py_ASNATIVEBYTES_NATIVE_ENDIAN,3.14,, +macro,Py_ASNATIVEBYTES_REJECT_NEGATIVE,3.14,, +macro,Py_ASNATIVEBYTES_UNSIGNED_BUFFER,3.14,, +macro,Py_AUDIT_READ,3.12,, func,Py_AddPendingCall,3.2,, func,Py_AtExit,3.2,, macro,Py_BEGIN_ALLOW_THREADS,3.2,, @@ -866,6 +916,7 @@ func,Py_GetPlatform,3.2,, func,Py_GetRecursionLimit,3.2,, func,Py_GetVersion,3.2,, data,Py_HasFileSystemDefaultEncoding,3.2,, +func,Py_IS_TYPE,3.15,, func,Py_IncRef,3.2,, func,Py_Initialize,3.2,, func,Py_InitializeEx,3.2,, @@ -882,22 +933,147 @@ func,Py_NewInterpreter,3.2,, func,Py_NewRef,3.10,, func,Py_PACK_FULL_VERSION,3.14,, func,Py_PACK_VERSION,3.14,, +macro,Py_READONLY,3.12,, func,Py_REFCNT,3.14,, +macro,Py_RELATIVE_OFFSET,3.12,, func,Py_ReprEnter,3.2,, func,Py_ReprLeave,3.2,, +func,Py_SET_SIZE,3.15,, +func,Py_SIZE,3.15,, func,Py_SetProgramName,3.2,, func,Py_SetPythonHome,3.2,, func,Py_SetRecursionLimit,3.2,, +macro,Py_TPFLAGS_BASETYPE,3.2,, +macro,Py_TPFLAGS_DEFAULT,3.2,, +macro,Py_TPFLAGS_HAVE_GC,3.2,, +macro,Py_TPFLAGS_HAVE_VECTORCALL,3.12,, +macro,Py_TPFLAGS_ITEMS_AT_END,3.12,, +macro,Py_TPFLAGS_METHOD_DESCRIPTOR,3.8,, +macro,Py_TP_USE_SPEC,3.14,, func,Py_TYPE,3.14,, +macro,Py_T_BOOL,3.12,, +macro,Py_T_BYTE,3.12,, +macro,Py_T_CHAR,3.12,, +macro,Py_T_DOUBLE,3.12,, +macro,Py_T_FLOAT,3.12,, +macro,Py_T_INT,3.12,, +macro,Py_T_LONG,3.12,, +macro,Py_T_LONGLONG,3.12,, +macro,Py_T_OBJECT_EX,3.12,, +macro,Py_T_PYSSIZET,3.12,, +macro,Py_T_SHORT,3.12,, +macro,Py_T_STRING,3.12,, +macro,Py_T_STRING_INPLACE,3.12,, +macro,Py_T_UBYTE,3.12,, +macro,Py_T_UINT,3.12,, +macro,Py_T_ULONG,3.12,, +macro,Py_T_ULONGLONG,3.12,, +macro,Py_T_USHORT,3.12,, type,Py_UCS4,3.2,, macro,Py_UNBLOCK_THREADS,3.2,, data,Py_UTF8Mode,3.8,, func,Py_VaBuildValue,3.2,, data,Py_Version,3.11,, func,Py_XNewRef,3.10,, +macro,Py_am_aiter,3.5,, +macro,Py_am_anext,3.5,, +macro,Py_am_await,3.5,, +macro,Py_am_send,3.10,, +macro,Py_bf_getbuffer,3.11,, +macro,Py_bf_releasebuffer,3.11,, type,Py_buffer,3.11,,full-abi type,Py_intptr_t,3.2,, +macro,Py_mod_abi,3.15,, +macro,Py_mod_create,3.5,, +macro,Py_mod_doc,3.15,, +macro,Py_mod_exec,3.5,, +macro,Py_mod_gil,3.13,, +macro,Py_mod_methods,3.15,, +macro,Py_mod_multiple_interpreters,3.12,, +macro,Py_mod_name,3.15,, +macro,Py_mod_state_clear,3.15,, +macro,Py_mod_state_free,3.15,, +macro,Py_mod_state_size,3.15,, +macro,Py_mod_state_traverse,3.15,, +macro,Py_mod_token,3.15,, +macro,Py_mp_ass_subscript,3.2,, +macro,Py_mp_length,3.2,, +macro,Py_mp_subscript,3.2,, +macro,Py_nb_absolute,3.2,, +macro,Py_nb_add,3.2,, +macro,Py_nb_and,3.2,, +macro,Py_nb_bool,3.2,, +macro,Py_nb_divmod,3.2,, +macro,Py_nb_float,3.2,, +macro,Py_nb_floor_divide,3.2,, +macro,Py_nb_index,3.2,, +macro,Py_nb_inplace_add,3.2,, +macro,Py_nb_inplace_and,3.2,, +macro,Py_nb_inplace_floor_divide,3.2,, +macro,Py_nb_inplace_lshift,3.2,, +macro,Py_nb_inplace_matrix_multiply,3.5,, +macro,Py_nb_inplace_multiply,3.2,, +macro,Py_nb_inplace_or,3.2,, +macro,Py_nb_inplace_power,3.2,, +macro,Py_nb_inplace_remainder,3.2,, +macro,Py_nb_inplace_rshift,3.2,, +macro,Py_nb_inplace_subtract,3.2,, +macro,Py_nb_inplace_true_divide,3.2,, +macro,Py_nb_inplace_xor,3.2,, +macro,Py_nb_int,3.2,, +macro,Py_nb_invert,3.2,, +macro,Py_nb_lshift,3.2,, +macro,Py_nb_matrix_multiply,3.5,, +macro,Py_nb_multiply,3.2,, +macro,Py_nb_negative,3.2,, +macro,Py_nb_or,3.2,, +macro,Py_nb_positive,3.2,, +macro,Py_nb_power,3.2,, +macro,Py_nb_remainder,3.2,, +macro,Py_nb_rshift,3.2,, +macro,Py_nb_subtract,3.2,, +macro,Py_nb_true_divide,3.2,, +macro,Py_nb_xor,3.2,, +macro,Py_sq_ass_item,3.2,, +macro,Py_sq_concat,3.2,, +macro,Py_sq_contains,3.2,, +macro,Py_sq_inplace_concat,3.2,, +macro,Py_sq_inplace_repeat,3.2,, +macro,Py_sq_item,3.2,, +macro,Py_sq_length,3.2,, +macro,Py_sq_repeat,3.2,, type,Py_ssize_t,3.2,, +macro,Py_tp_alloc,3.2,, +macro,Py_tp_base,3.2,, +macro,Py_tp_bases,3.2,, +macro,Py_tp_call,3.2,, +macro,Py_tp_clear,3.2,, +macro,Py_tp_dealloc,3.2,, +macro,Py_tp_del,3.2,, +macro,Py_tp_descr_get,3.2,, +macro,Py_tp_descr_set,3.2,, +macro,Py_tp_doc,3.2,, +macro,Py_tp_finalize,3.5,, +macro,Py_tp_free,3.2,, +macro,Py_tp_getattr,3.2,, +macro,Py_tp_getattro,3.2,, +macro,Py_tp_getset,3.2,, +macro,Py_tp_hash,3.2,, +macro,Py_tp_init,3.2,, +macro,Py_tp_is_gc,3.2,, +macro,Py_tp_iter,3.2,, +macro,Py_tp_iternext,3.2,, +macro,Py_tp_members,3.2,, +macro,Py_tp_methods,3.2,, +macro,Py_tp_new,3.2,, +macro,Py_tp_repr,3.2,, +macro,Py_tp_richcompare,3.2,, +macro,Py_tp_setattr,3.2,, +macro,Py_tp_setattro,3.2,, +macro,Py_tp_str,3.2,, +macro,Py_tp_token,3.14,, +macro,Py_tp_traverse,3.2,, +macro,Py_tp_vectorcall,3.14,, type,Py_uintptr_t,3.2,, type,allocfunc,3.2,, type,binaryfunc,3.2,, diff --git a/Doc/deprecations/c-api-pending-removal-in-3.20.rst b/Doc/deprecations/c-api-pending-removal-in-3.20.rst index 82f975d6ed4..18623b19a2a 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.20.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.20.rst @@ -5,3 +5,5 @@ Pending removal in Python 3.20 Use :c:func:`PyComplex_AsCComplex` and :c:func:`PyComplex_FromCComplex` to convert a Python complex number to/from the C :c:type:`Py_complex` representation. + +* Macros :c:macro:`!Py_MATH_PIl` and :c:macro:`!Py_MATH_El`. diff --git a/Doc/deprecations/pending-removal-in-3.13.rst b/Doc/deprecations/pending-removal-in-3.13.rst index 2fd2f12cc6a..d5b8c80e8f9 100644 --- a/Doc/deprecations/pending-removal-in-3.13.rst +++ b/Doc/deprecations/pending-removal-in-3.13.rst @@ -38,15 +38,3 @@ APIs: * :meth:`!unittest.TestProgram.usageExit` (:gh:`67048`) * :class:`!webbrowser.MacOSX` (:gh:`86421`) * :class:`classmethod` descriptor chaining (:gh:`89519`) -* :mod:`importlib.resources` deprecated methods: - - * ``contents()`` - * ``is_resource()`` - * ``open_binary()`` - * ``open_text()`` - * ``path()`` - * ``read_binary()`` - * ``read_text()`` - - Use :func:`importlib.resources.files` instead. Refer to `importlib-resources: Migrating from Legacy - `_ (:gh:`106531`) diff --git a/Doc/deprecations/pending-removal-in-3.17.rst b/Doc/deprecations/pending-removal-in-3.17.rst index 0a1c2f08cab..e769c9d371e 100644 --- a/Doc/deprecations/pending-removal-in-3.17.rst +++ b/Doc/deprecations/pending-removal-in-3.17.rst @@ -23,6 +23,12 @@ Pending removal in Python 3.17 (Contributed by Shantanu Jain in :gh:`91896`.) +* :mod:`encodings`: + + - Passing non-ascii *encoding* names to :func:`encodings.normalize_encoding` + is deprecated and scheduled for removal in Python 3.17. + (Contributed by Stan Ulbrych in :gh:`136702`) + * :mod:`typing`: - Before Python 3.14, old-style unions were implemented using the private class diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 7ed430625f3..30186741670 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -76,7 +76,7 @@ although there is currently no date scheduled for their removal. * :mod:`mailbox`: Use of StringIO input and text mode is deprecated, use BytesIO and binary mode instead. -* :mod:`os`: Calling :func:`os.register_at_fork` in multi-threaded process. +* :mod:`os`: Calling :func:`os.register_at_fork` in a multi-threaded process. * :class:`!pydoc.ErrorDuringImport`: A tuple value for *exc_info* parameter is deprecated, use an exception instance. diff --git a/Doc/extending/extending.rst b/Doc/extending/extending.rst index dee92312169..f9b65643dfe 100644 --- a/Doc/extending/extending.rst +++ b/Doc/extending/extending.rst @@ -426,7 +426,7 @@ A pointer to the module definition must be returned via :c:func:`PyModuleDef_Ini so that the import machinery can create the module and store it in ``sys.modules``. When embedding Python, the :c:func:`!PyInit_spam` function is not called -automatically unless there's an entry in the :c:data:`!PyImport_Inittab` table. +automatically unless there's an entry in the :c:data:`PyImport_Inittab` table. To add the module to the initialization table, use :c:func:`PyImport_AppendInittab`, optionally followed by an import of the module:: diff --git a/Doc/glossary.rst b/Doc/glossary.rst index c0ca0be304e..a4066d42927 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -1025,6 +1025,15 @@ Glossary applied to all scopes, only those relying on a known set of local and nonlocal variable names are restricted to optimized scopes. + optional module + An :term:`extension module` that is part of the :term:`standard library`, + but may be absent in some builds of :term:`CPython`, + usually due to missing third-party libraries or because the module + is not available for a given platform. + + See :ref:`optional-module-requirements` for a list of optional modules + that require third-party libraries. + package A Python :term:`module` which can contain submodules or recursively, subpackages. Technically, a package is a Python module with a diff --git a/Doc/howto/a-conceptual-overview-of-asyncio.rst b/Doc/howto/a-conceptual-overview-of-asyncio.rst index af1e39480cc..3adfedbf410 100644 --- a/Doc/howto/a-conceptual-overview-of-asyncio.rst +++ b/Doc/howto/a-conceptual-overview-of-asyncio.rst @@ -1,7 +1,7 @@ .. _a-conceptual-overview-of-asyncio: **************************************** -A Conceptual Overview of :mod:`!asyncio` +A conceptual overview of :mod:`!asyncio` **************************************** This :ref:`HOWTO ` article seeks to help you build a sturdy mental @@ -37,15 +37,15 @@ In part 1, we'll cover the main, high-level building blocks of :mod:`!asyncio`: the event loop, coroutine functions, coroutine objects, tasks, and ``await``. ========== -Event Loop +Event loop ========== Everything in :mod:`!asyncio` happens relative to the event loop. -It's the star of the show. +It's the star of the show, but prefers to work behind the scenes, managing +and coordinating resources. It's like an orchestra conductor. -It's behind the scenes managing resources. Some power is explicitly granted to it, but a lot of its ability to get things -done comes from the respect and cooperation of its worker bees. +done comes from the respect and cooperation of its band members. In more technical terms, the event loop contains a collection of jobs to be run. Some jobs are added directly by you, and some indirectly by :mod:`!asyncio`. @@ -59,7 +59,7 @@ This process repeats indefinitely, with the event loop cycling endlessly onwards. If there are no more jobs pending execution, the event loop is smart enough to rest and avoid needlessly wasting CPU cycles, and will come back when there's -more work to be done. +more work to be done, such as when I/O operations complete or timers expire. Effective execution relies on jobs sharing well and cooperating; a greedy job could hog control and leave the other jobs to starve, rendering the overall @@ -170,14 +170,17 @@ Roughly speaking, :ref:`tasks ` are coroutines (not coroutine functions) tied to an event loop. A task also maintains a list of callback functions whose importance will become clear in a moment when we discuss :keyword:`await`. -The recommended way to create tasks is via :func:`asyncio.create_task`. Creating a task automatically schedules it for execution (by adding a callback to run it in the event loop's to-do list, that is, collection of jobs). +The recommended way to create tasks is via :func:`asyncio.create_task`. -Since there's only one event loop (in each thread), :mod:`!asyncio` takes care of -associating the task with the event loop for you. As such, there's no need -to specify the event loop. +:mod:`!asyncio` automatically associates tasks with the event loop for you. +This automatic association was purposely designed into :mod:`!asyncio` for +the sake of simplicity. +Without it, you'd have to keep track of the event loop object and pass it to +any coroutine function that wants to create tasks, adding redundant clutter +to your code. :: @@ -250,6 +253,10 @@ different ways:: In a crucial way, the behavior of ``await`` depends on the type of object being awaited. +^^^^^^^^^^^^^^ +Awaiting tasks +^^^^^^^^^^^^^^ + Awaiting a task will cede control from the current task or coroutine to the event loop. In the process of relinquishing control, a few important things happen. @@ -281,6 +288,10 @@ This is a basic, yet reliable mental model. In practice, the control handoffs are slightly more complex, but not by much. In part 2, we'll walk through the details that make this possible. +^^^^^^^^^^^^^^^^^^^ +Awaiting coroutines +^^^^^^^^^^^^^^^^^^^ + **Unlike tasks, awaiting a coroutine does not hand control back to the event loop!** Wrapping a coroutine in a task first, then awaiting that would cede @@ -347,8 +358,10 @@ The design intentionally trades off some conceptual clarity around usage of ``await`` for improved performance. Each time a task is awaited, control needs to be passed all the way up the call stack to the event loop. -That might sound minor, but in a large program with many ``await`` statements and a deep -call stack, that overhead can add up to a meaningful performance drag. +Then, the event loop needs to manage its internal state and work through +its processing logic to resume the next job. +That might sound minor, but in a large program with many ``await``\ s, that +overhead can add up to a non-negligible performance drag. ------------------------------------------------ A conceptual overview part 2: the nuts and bolts @@ -364,7 +377,8 @@ and how to make your own asynchronous operators. The inner workings of coroutines ================================ -:mod:`!asyncio` leverages four components to pass around control. +:mod:`!asyncio` leverages four components of Python to pass +around control. :meth:`coroutine.send(arg) ` is the method used to start or resume a coroutine. @@ -448,9 +462,9 @@ That might sound odd to you. You might be thinking: That causes the error: ``SyntaxError: yield from not allowed in a coroutine.`` This was intentionally designed for the sake of simplicity -- mandating only one way of using coroutines. + Despite that, ``yield from`` and ``await`` effectively do the same thing. Initially ``yield`` was barred as well, but was re-accepted to allow for async generators. - Despite that, ``yield from`` and ``await`` effectively do the same thing. ======= Futures diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 3776132c685..83eba8cfea3 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -45,9 +45,12 @@ single-phase initialization. Multi-Phase Initialization .......................... -Extensions that use multi-phase initialization (i.e., -:c:func:`PyModuleDef_Init`) should add a :c:data:`Py_mod_gil` slot in the -module definition. If your extension supports older versions of CPython, +Extensions that use :ref:`multi-phase initialization ` +(functions like :c:func:`PyModuleDef_Init`, +:c:func:`PyModExport_* ` export hook, +:c:func:`PyModule_FromSlotsAndSpec`) should add a +:c:data:`Py_mod_gil` slot in the module definition. +If your extension supports older versions of CPython, you should guard the slot with a :c:data:`PY_VERSION_HEX` check. :: @@ -60,18 +63,12 @@ you should guard the slot with a :c:data:`PY_VERSION_HEX` check. {0, NULL} }; - static struct PyModuleDef moduledef = { - PyModuleDef_HEAD_INIT, - .m_slots = module_slots, - ... - }; - Single-Phase Initialization ........................... -Extensions that use single-phase initialization (i.e., -:c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to +Extensions that use legacy :ref:`single-phase initialization ` +(that is, :c:func:`PyModule_Create`) should call :c:func:`PyUnstable_Module_SetGIL` to indicate that they support running with the GIL disabled. The function is only defined in the free-threaded build, so you should guard the call with ``#ifdef Py_GIL_DISABLED`` to avoid compilation errors in the regular build. @@ -203,7 +200,7 @@ Memory Allocation APIs Python's memory management C API provides functions in three different :ref:`allocation domains `: "raw", "mem", and "object". For thread-safety, the free-threaded build requires that only Python objects -are allocated using the object domain, and that all Python object are +are allocated using the object domain, and that all Python objects are allocated using that domain. This differs from the prior Python versions, where this was only a best practice and not a hard requirement. @@ -344,12 +341,12 @@ This means you cannot rely on nested critical sections to lock multiple objects at once, as the inner critical section may suspend the outer ones. Instead, use :c:macro:`Py_BEGIN_CRITICAL_SECTION2` to lock two objects simultaneously. -Note that the locks described above are only :c:type:`!PyMutex` based locks. +Note that the locks described above are only :c:type:`PyMutex` based locks. The critical section implementation does not know about or affect other locking mechanisms that might be in use, like POSIX mutexes. Also note that while -blocking on any :c:type:`!PyMutex` causes the critical sections to be +blocking on any :c:type:`PyMutex` causes the critical sections to be suspended, only the mutexes that are part of the critical sections are -released. If :c:type:`!PyMutex` is used without a critical section, it will +released. If :c:type:`PyMutex` is used without a critical section, it will not be released and therefore does not get the same deadlock avoidance. Important Considerations @@ -397,7 +394,8 @@ The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. * `pypa/manylinux `_ supports the free-threaded build, with the ``t`` suffix, such as ``python3.13t``. * `pypa/cibuildwheel `_ supports the - free-threaded build if you set + free-threaded build on Python 3.13 and 3.14. On Python 3.14, free-threaded + wheels will be built by default. On Python 3.13, you will need to set `CIBW_ENABLE to cpython-freethreading `_. Limited C API and Stable ABI diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst index 24069617c47..380c2be0495 100644 --- a/Doc/howto/free-threading-python.rst +++ b/Doc/howto/free-threading-python.rst @@ -11,9 +11,7 @@ available processing power by running threads in parallel on available CPU cores While not all software will benefit from this automatically, programs designed with threading in mind will run faster on multi-core hardware. -The free-threaded mode is working and continues to be improved, but -there is some additional overhead in single-threaded workloads compared -to the regular build. Additionally, third-party packages, in particular ones +Some third-party packages, in particular ones with an :term:`extension module`, may not be ready for use in a free-threaded build, and will re-enable the :term:`GIL`. @@ -101,60 +99,42 @@ This section describes known limitations of the free-threaded CPython build. Immortalization --------------- -The free-threaded build of the 3.13 release makes some objects :term:`immortal`. +In the free-threaded build, some objects are :term:`immortal`. Immortal objects are not deallocated and have reference counts that are never modified. This is done to avoid reference count contention that would prevent efficient multi-threaded scaling. -An object will be made immortal when a new thread is started for the first time -after the main thread is running. The following objects are immortalized: +As of the 3.14 release, immortalization is limited to: -* :ref:`function ` objects declared at the module level -* :ref:`method ` descriptors -* :ref:`code ` objects -* :term:`module` objects and their dictionaries -* :ref:`classes ` (type objects) - -Because immortal objects are never deallocated, applications that create many -objects of these types may see increased memory usage. This is expected to be -addressed in the 3.14 release. - -Additionally, numeric and string literals in the code as well as strings -returned by :func:`sys.intern` are also immortalized. This behavior is -expected to remain in the 3.14 free-threaded build. +* Code constants: numeric literals, string literals, and tuple literals + composed of other constants. +* Strings interned by :func:`sys.intern`. Frame objects ------------- -It is not safe to access :ref:`frame ` objects from other -threads and doing so may cause your program to crash . This means that -:func:`sys._current_frames` is generally not safe to use in a free-threaded -build. Functions like :func:`inspect.currentframe` and :func:`sys._getframe` -are generally safe as long as the resulting frame object is not passed to -another thread. +It is not safe to access :attr:`frame.f_locals` from a :ref:`frame ` +object if that frame is currently executing in another thread, and doing so may +crash the interpreter. + Iterators --------- -Sharing the same iterator object between multiple threads is generally not -safe and threads may see duplicate or missing elements when iterating or crash -the interpreter. +It is generally not thread-safe to access the same iterator object from +multiple threads concurrently, and threads may see duplicate or missing +elements. Single-threaded performance --------------------------- The free-threaded build has additional overhead when executing Python code -compared to the default GIL-enabled build. In 3.13, this overhead is about -40% on the `pyperformance `_ suite. -Programs that spend most of their time in C extensions or I/O will see -less of an impact. The largest impact is because the specializing adaptive -interpreter (:pep:`659`) is disabled in the free-threaded build. We expect -to re-enable it in a thread-safe way in the 3.14 release. This overhead is -expected to be reduced in upcoming Python release. We are aiming for an -overhead of 10% or less on the pyperformance suite compared to the default -GIL-enabled build. +compared to the default GIL-enabled build. The amount of overhead depends +on the workload and hardware. On the pyperformance benchmark suite, the +average overhead ranges from about 1% on macOS aarch64 to 8% on x86-64 Linux +systems. Behavioral changes diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 053558e3890..552514063c9 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -4,7 +4,7 @@ Functional Programming HOWTO ******************************** -:Author: A. M. Kuchling +:Author: \A. M. Kuchling :Release: 0.32 In this document, we'll take a tour of Python's features suitable for diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index 7da6dc8a397..6092c75f48f 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -353,7 +353,7 @@ garbage collection protocol. That is, heap types should: - Have the :c:macro:`Py_TPFLAGS_HAVE_GC` flag. -- Define a traverse function using ``Py_tp_traverse``, which +- Define a traverse function using :c:data:`Py_tp_traverse`, which visits the type (e.g. using ``Py_VISIT(Py_TYPE(self))``). Please refer to the documentation of diff --git a/Doc/howto/unicode.rst b/Doc/howto/unicode.rst index 254fe729355..243cc27bac7 100644 --- a/Doc/howto/unicode.rst +++ b/Doc/howto/unicode.rst @@ -352,6 +352,8 @@ If you don't include such a comment, the default encoding used will be UTF-8 as already mentioned. See also :pep:`263` for more information. +.. _unicode-properties: + Unicode Properties ------------------ diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index d79d1abe8d0..4e77d2cb407 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -15,7 +15,7 @@ Introduction You may also find useful the following article on fetching web resources with Python: - * `Basic Authentication `_ + * `Basic Authentication `__ A tutorial on *Basic Authentication*, with examples in Python. diff --git a/Doc/includes/optional-module.rst b/Doc/includes/optional-module.rst new file mode 100644 index 00000000000..262e73f2eaa --- /dev/null +++ b/Doc/includes/optional-module.rst @@ -0,0 +1,9 @@ +This is an :term:`optional module`. +If it is missing from your copy of CPython, +look for documentation from your distributor (that is, +whoever provided Python to you). +If you are the distributor, see :ref:`optional-module-requirements`. + +.. Similar notes appear in the docs of the modules: + - zipfile + - tarfile diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 9655db4f301..71c4f094886 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -767,9 +767,9 @@ how the command-line arguments should be handled. The supplied actions are: Namespace(foo=42) * ``'store_true'`` and ``'store_false'`` - These are special cases of - ``'store_const'`` used for storing the values ``True`` and ``False`` - respectively. In addition, they create default values of ``False`` and - ``True`` respectively:: + ``'store_const'`` that respectively store the values ``True`` and ``False`` + with default values of ``False`` and + ``True``:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('--foo', action='store_true') @@ -789,8 +789,8 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args('--foo 1 --foo 2'.split()) Namespace(foo=['0', '1', '2']) -* ``'append_const'`` - This stores a list, and appends the value specified by - the const_ keyword argument to the list; note that the const_ keyword +* ``'append_const'`` - This appends the value specified by + the const_ keyword argument to a list; note that the const_ keyword argument defaults to ``None``. The ``'append_const'`` action is typically useful when multiple arguments need to store constants to the same list. For example:: @@ -801,8 +801,8 @@ how the command-line arguments should be handled. The supplied actions are: >>> parser.parse_args('--str --int'.split()) Namespace(types=[, ]) -* ``'extend'`` - This stores a list and appends each item from the multi-value - argument list to it. +* ``'extend'`` - This appends each item from a multi-value + argument to a list. The ``'extend'`` action is typically used with the nargs_ keyword argument value ``'+'`` or ``'*'``. Note that when nargs_ is ``None`` (the default) or ``'?'``, each @@ -816,7 +816,7 @@ how the command-line arguments should be handled. The supplied actions are: .. versionadded:: 3.8 -* ``'count'`` - This counts the number of times a keyword argument occurs. For +* ``'count'`` - This counts the number of times an argument occurs. For example, this is useful for increasing verbosity levels:: >>> parser = argparse.ArgumentParser() @@ -1322,8 +1322,12 @@ attribute is determined by the ``dest`` keyword argument of For optional argument actions, the value of ``dest`` is normally inferred from the option strings. :class:`ArgumentParser` generates the value of ``dest`` by -taking the first long option string and stripping away the initial ``--`` -string. If no long option strings were supplied, ``dest`` will be derived from +taking the first double-dash long option string and stripping away the initial +``-`` characters. +If no double-dash long option strings were supplied, ``dest`` will be derived +from the first single-dash long option string by stripping the initial ``-`` +character. +If no long option strings were supplied, ``dest`` will be derived from the first short option string by stripping the initial ``-`` character. Any internal ``-`` characters will be converted to ``_`` characters to make sure the string is a valid attribute name. The examples below illustrate this @@ -1331,11 +1335,12 @@ behavior:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('-f', '--foo-bar', '--foo') + >>> parser.add_argument('-q', '-quz') >>> parser.add_argument('-x', '-y') - >>> parser.parse_args('-f 1 -x 2'.split()) - Namespace(foo_bar='1', x='2') - >>> parser.parse_args('--foo 1 -y 2'.split()) - Namespace(foo_bar='1', x='2') + >>> parser.parse_args('-f 1 -q 2 -x 3'.split()) + Namespace(foo_bar='1', quz='2', x='3') + >>> parser.parse_args('--foo 1 -quz 2 -y 3'.split()) + Namespace(foo_bar='1', quz='2', x='2') ``dest`` allows a custom attribute name to be provided:: @@ -1344,6 +1349,9 @@ behavior:: >>> parser.parse_args('--foo XXX'.split()) Namespace(bar='XXX') +.. versionchanged:: next + Single-dash long option now takes precedence over short options. + .. _deprecated: @@ -1437,8 +1445,18 @@ this API may be passed as the ``action`` parameter to >>> parser.parse_args(['--no-foo']) Namespace(foo=False) + Single-dash long options are also supported. + For example, negative option ``-nofoo`` is automatically added for + positive option ``-foo``. + But no additional options are added for short options such as ``-f``. + .. versionadded:: 3.9 + .. versionchanged:: next + Added support for single-dash options. + + Added support for alternate prefix_chars_. + The parse_args() method ----------------------- @@ -2070,7 +2088,9 @@ Parser defaults >>> parser.parse_args(['736']) Namespace(bar=42, baz='badger', foo=736) - Note that parser-level defaults always override argument-level defaults:: + Note that defaults can be set at both the parser level using :meth:`set_defaults` + and at the argument level using :meth:`add_argument`. If both are called for the + same argument, the last default set for an argument is used:: >>> parser = argparse.ArgumentParser() >>> parser.add_argument('--foo', default='bar') diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index 49462167217..2e7d0dbc26e 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -2205,10 +2205,10 @@ Async and await Apart from the node classes, the :mod:`ast` module defines these utility functions and classes for traversing abstract syntax trees: -.. function:: parse(source, filename='', mode='exec', *, type_comments=False, feature_version=None, optimize=-1) +.. function:: parse(source, filename='', mode='exec', *, type_comments=False, feature_version=None, optimize=-1, module=None) Parse the source into an AST node. Equivalent to ``compile(source, - filename, mode, flags=FLAGS_VALUE, optimize=optimize)``, + filename, mode, flags=FLAGS_VALUE, optimize=optimize, module=module)``, where ``FLAGS_VALUE`` is ``ast.PyCF_ONLY_AST`` if ``optimize <= 0`` and ``ast.PyCF_OPTIMIZED_AST`` otherwise. @@ -2261,6 +2261,9 @@ and classes for traversing abstract syntax trees: The minimum supported version for ``feature_version`` is now ``(3, 7)``. The ``optimize`` argument was added. + .. versionadded:: 3.15 + Added the *module* parameter. + .. function:: unparse(ast_obj) diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index 0ccc7a2b448..72f484fd1cb 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -1631,6 +1631,9 @@ async/await code consider using the high-level conforms to the :class:`asyncio.SubprocessTransport` base class and *protocol* is an object instantiated by the *protocol_factory*. + If the transport is closed or is garbage collected, the child process + is killed if it is still running. + .. method:: loop.subprocess_shell(protocol_factory, cmd, *, \ stdin=subprocess.PIPE, stdout=subprocess.PIPE, \ stderr=subprocess.PIPE, **kwargs) @@ -1654,6 +1657,9 @@ async/await code consider using the high-level conforms to the :class:`SubprocessTransport` base class and *protocol* is an object instantiated by the *protocol_factory*. + If the transport is closed or is garbage collected, the child process + is killed if it is still running. + .. note:: It is the application's responsibility to ensure that all whitespace and special characters are quoted appropriately to avoid `shell injection diff --git a/Doc/library/asyncio-subprocess.rst b/Doc/library/asyncio-subprocess.rst index 03e76bc8689..9416c758e51 100644 --- a/Doc/library/asyncio-subprocess.rst +++ b/Doc/library/asyncio-subprocess.rst @@ -76,6 +76,9 @@ Creating Subprocesses See the documentation of :meth:`loop.subprocess_exec` for other parameters. + If the process object is garbage collected while the process is still + running, the child process will be killed. + .. versionchanged:: 3.10 Removed the *loop* parameter. @@ -95,6 +98,9 @@ Creating Subprocesses See the documentation of :meth:`loop.subprocess_shell` for other parameters. + If the process object is garbage collected while the process is still + running, the child process will be killed. + .. important:: It is the application's responsibility to ensure that all whitespace and diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index f825ae92ec7..863b3e33657 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -1221,8 +1221,8 @@ Task Object To cancel a running Task use the :meth:`cancel` method. Calling it will cause the Task to throw a :exc:`CancelledError` exception into - the wrapped coroutine. If a coroutine is awaiting on a Future - object during cancellation, the Future object will be cancelled. + the wrapped coroutine. If a coroutine is awaiting on a future-like + object during cancellation, the awaited object will be cancelled. :meth:`cancelled` can be used to check if the Task was cancelled. The method returns ``True`` if the wrapped coroutine did not @@ -1411,6 +1411,10 @@ Task Object the cancellation, it needs to call :meth:`Task.uncancel` in addition to catching the exception. + If the Task being cancelled is currently awaiting on a future-like + object, that awaited object will also be cancelled. This cancellation + propagates down the entire chain of awaited objects. + .. versionchanged:: 3.9 Added the *msg* parameter. diff --git a/Doc/library/asyncio.rst b/Doc/library/asyncio.rst index 444db01390d..0f72e31dee5 100644 --- a/Doc/library/asyncio.rst +++ b/Doc/library/asyncio.rst @@ -79,6 +79,10 @@ You can experiment with an ``asyncio`` concurrent context in the :term:`REPL`: >>> await asyncio.sleep(10, result='hello') 'hello' +This REPL provides limited compatibility with :envvar:`PYTHON_BASIC_REPL`. +It is recommended that the default REPL is used +for full functionality and the latest features. + .. audit-event:: cpython.run_stdin "" "" .. versionchanged:: 3.12.5 (also 3.11.10, 3.10.15, 3.9.20, and 3.8.20) diff --git a/Doc/library/bdb.rst b/Doc/library/bdb.rst index c7a3e0c596b..a3c6da7a6d6 100644 --- a/Doc/library/bdb.rst +++ b/Doc/library/bdb.rst @@ -236,7 +236,7 @@ The :mod:`bdb` module also defines two classes: Normally derived classes don't override the following methods, but they may if they want to redefine the definition of stopping and breakpoints. - .. method:: is_skipped_line(module_name) + .. method:: is_skipped_module(module_name) Return ``True`` if *module_name* matches any skip pattern. diff --git a/Doc/library/bz2.rst b/Doc/library/bz2.rst index ebe2e43feba..12650861c0f 100644 --- a/Doc/library/bz2.rst +++ b/Doc/library/bz2.rst @@ -25,6 +25,8 @@ The :mod:`bz2` module contains: * The :func:`compress` and :func:`decompress` functions for one-shot (de)compression. +.. include:: ../includes/optional-module.rst + (De)compression of files ------------------------ diff --git a/Doc/library/cmdline.rst b/Doc/library/cmdline.rst index 16c67ddbf7c..c43b10157f9 100644 --- a/Doc/library/cmdline.rst +++ b/Doc/library/cmdline.rst @@ -16,17 +16,17 @@ The following modules have a command-line interface. * :ref:`dis ` * :ref:`doctest ` * :mod:`!encodings.rot_13` -* :mod:`ensurepip` +* :ref:`ensurepip ` * :mod:`filecmp` * :mod:`fileinput` * :mod:`ftplib` * :ref:`gzip ` * :ref:`http.server ` -* :mod:`!idlelib` +* :ref:`idlelib ` * :ref:`inspect ` * :ref:`json ` * :ref:`mimetypes ` -* :mod:`pdb` +* :ref:`pdb ` * :ref:`pickle ` * :ref:`pickletools ` * :ref:`platform ` @@ -52,8 +52,8 @@ The following modules have a command-line interface. * :mod:`turtledemo` * :ref:`unittest ` * :ref:`uuid ` -* :mod:`venv` -* :mod:`webbrowser` +* :ref:`venv ` +* :ref:`webbrowser ` * :ref:`zipapp ` * :ref:`zipfile ` diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 9a8108d882e..4e0db485e06 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -1209,7 +1209,7 @@ If a new entry overwrites an existing entry, the original insertion position is changed and moved to the end:: class LastUpdatedOrderedDict(OrderedDict): - 'Store items in the order the keys were last added' + 'Store items in the order that the keys were last updated.' def __setitem__(self, key, value): super().__setitem__(key, value) diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst index a901403621b..89b6fe540f5 100644 --- a/Doc/library/compression.zstd.rst +++ b/Doc/library/compression.zstd.rst @@ -33,6 +33,8 @@ The :mod:`!compression.zstd` module contains: * The :class:`CompressionParameter`, :class:`DecompressionParameter`, and :class:`Strategy` classes for setting advanced (de)compression parameters. +.. include:: ../includes/optional-module.rst + Exceptions ---------- diff --git a/Doc/library/concurrent.interpreters.rst b/Doc/library/concurrent.interpreters.rst index 41ea6af3b22..55036090e8d 100644 --- a/Doc/library/concurrent.interpreters.rst +++ b/Doc/library/concurrent.interpreters.rst @@ -29,12 +29,12 @@ Actual concurrency is available separately through .. seealso:: :class:`~concurrent.futures.InterpreterPoolExecutor` - combines threads with interpreters in a familiar interface. + Combines threads with interpreters in a familiar interface. - .. XXX Add references to the upcoming HOWTO docs in the seealso block. + .. XXX Add references to the upcoming HOWTO docs in the seealso block. :ref:`isolating-extensions-howto` - how to update an extension module to support multiple interpreters + How to update an extension module to support multiple interpreters. :pep:`554` diff --git a/Doc/library/contextvars.rst b/Doc/library/contextvars.rst index 57580ce026e..b218468a084 100644 --- a/Doc/library/contextvars.rst +++ b/Doc/library/contextvars.rst @@ -313,7 +313,7 @@ client:: addr = writer.transport.get_extra_info('socket').getpeername() client_addr_var.set(addr) - # In any code that we call is now possible to get + # In any code that we call, it is now possible to get the # client's address by calling 'client_addr_var.get()'. while True: diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 3ea7cd210f7..4a033d823e6 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -295,8 +295,8 @@ The :mod:`csv` module defines the following classes: - the second through n-th rows contain strings where at least one value's length differs from that of the putative header of that column. - Twenty rows after the first row are sampled; if more than half of columns + - rows meet the criteria, :const:`True` is returned. + Twenty-one rows after the header are sampled; if more than half of the + columns + rows meet the criteria, :const:`True` is returned. .. note:: diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index d8dac24c8ab..9c0b246c095 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -14,6 +14,8 @@ data types, and allows calling functions in DLLs or shared libraries. It can be used to wrap these libraries in pure Python. +.. include:: ../includes/optional-module.rst + .. _ctypes-ctypes-tutorial: diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index fb84cf32246..057d338edda 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -23,6 +23,8 @@ Linux and the BSD variants of Unix. .. include:: ../includes/wasm-mobile-notavail.rst +.. include:: ../includes/optional-module.rst + .. note:: Whenever the documentation mentions a *character* it can be specified @@ -1349,7 +1351,6 @@ The :mod:`curses` module defines the following data members: .. data:: version -.. data:: __version__ A bytes object representing the current version of the module. diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index 985153b5443..621aa23ecc8 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -264,7 +264,7 @@ allows the settings to be changed. This approach meets the needs of most applications. For more advanced work, it may be useful to create alternate contexts using the -Context() constructor. To make an alternate active, use the :func:`setcontext` +:meth:`Context` constructor. To make an alternate active, use the :func:`setcontext` function. In accordance with the standard, the :mod:`decimal` module provides two ready to @@ -1575,7 +1575,7 @@ Constants Specification that this implementation complies with. See https://speleotrove.com/decimal/decarith.html for the specification. - .. versionadded:: next + .. versionadded:: 3.15 The following constants are only relevant for the C module. They @@ -2109,20 +2109,20 @@ to work with the :class:`Decimal` class:: Decimal FAQ ----------- -Q. It is cumbersome to type ``decimal.Decimal('1234.5')``. Is there a way to +Q: It is cumbersome to type ``decimal.Decimal('1234.5')``. Is there a way to minimize typing when using the interactive interpreter? -A. Some users abbreviate the constructor to just a single letter: +A: Some users abbreviate the constructor to just a single letter: >>> D = decimal.Decimal >>> D('1.23') + D('3.45') Decimal('4.68') -Q. In a fixed-point application with two decimal places, some inputs have many +Q: In a fixed-point application with two decimal places, some inputs have many places and need to be rounded. Others are not supposed to have excess digits and need to be validated. What methods should be used? -A. The :meth:`~Decimal.quantize` method rounds to a fixed number of decimal places. If +A: The :meth:`~Decimal.quantize` method rounds to a fixed number of decimal places. If the :const:`Inexact` trap is set, it is also useful for validation: >>> TWOPLACES = Decimal(10) ** -2 # same as Decimal('0.01') @@ -2140,10 +2140,10 @@ the :const:`Inexact` trap is set, it is also useful for validation: ... Inexact: None -Q. Once I have valid two place inputs, how do I maintain that invariant +Q: Once I have valid two place inputs, how do I maintain that invariant throughout an application? -A. Some operations like addition, subtraction, and multiplication by an integer +A: Some operations like addition, subtraction, and multiplication by an integer will automatically preserve fixed point. Others operations, like division and non-integer multiplication, will change the number of decimal places and need to be followed-up with a :meth:`~Decimal.quantize` step: @@ -2175,21 +2175,21 @@ to handle the :meth:`~Decimal.quantize` step: >>> div(b, a) Decimal('0.03') -Q. There are many ways to express the same value. The numbers ``200``, +Q: There are many ways to express the same value. The numbers ``200``, ``200.000``, ``2E2``, and ``.02E+4`` all have the same value at various precisions. Is there a way to transform them to a single recognizable canonical value? -A. The :meth:`~Decimal.normalize` method maps all equivalent values to a single +A: The :meth:`~Decimal.normalize` method maps all equivalent values to a single representative: >>> values = map(Decimal, '200 200.000 2E2 .02E+4'.split()) >>> [v.normalize() for v in values] [Decimal('2E+2'), Decimal('2E+2'), Decimal('2E+2'), Decimal('2E+2')] -Q. When does rounding occur in a computation? +Q: When does rounding occur in a computation? -A. It occurs *after* the computation. The philosophy of the decimal +A: It occurs *after* the computation. The philosophy of the decimal specification is that numbers are considered exact and are created independent of the current context. They can even have greater precision than current context. Computations process with those @@ -2207,10 +2207,10 @@ applied to the *result* of the computation:: >>> pi + 0 - Decimal('0.00005'). # Intermediate values are rounded Decimal('3.1416') -Q. Some decimal values always print with exponential notation. Is there a way +Q: Some decimal values always print with exponential notation. Is there a way to get a non-exponential representation? -A. For some values, exponential notation is the only way to express the number +A: For some values, exponential notation is the only way to express the number of significant places in the coefficient. For example, expressing ``5.0E+3`` as ``5000`` keeps the value constant but cannot show the original's two-place significance. @@ -2225,9 +2225,9 @@ value unchanged: >>> remove_exponent(Decimal('5E+3')) Decimal('5000') -Q. Is there a way to convert a regular float to a :class:`Decimal`? +Q: Is there a way to convert a regular float to a :class:`Decimal`? -A. Yes, any binary floating-point number can be exactly expressed as a +A: Yes, any binary floating-point number can be exactly expressed as a Decimal though an exact conversion may take more precision than intuition would suggest: @@ -2236,19 +2236,19 @@ suggest: >>> Decimal(math.pi) Decimal('3.141592653589793115997963468544185161590576171875') -Q. Within a complex calculation, how can I make sure that I haven't gotten a +Q: Within a complex calculation, how can I make sure that I haven't gotten a spurious result because of insufficient precision or rounding anomalies. -A. The decimal module makes it easy to test results. A best practice is to +A: The decimal module makes it easy to test results. A best practice is to re-run calculations using greater precision and with various rounding modes. Widely differing results indicate insufficient precision, rounding mode issues, ill-conditioned inputs, or a numerically unstable algorithm. -Q. I noticed that context precision is applied to the results of operations but +Q: I noticed that context precision is applied to the results of operations but not to the inputs. Is there anything to watch out for when mixing values of different precisions? -A. Yes. The principle is that all values are considered to be exact and so is +A: Yes. The principle is that all values are considered to be exact and so is the arithmetic on those values. Only the results are rounded. The advantage for inputs is that "what you type is what you get". A disadvantage is that the results can look odd if you forget that the inputs haven't been rounded: @@ -2276,9 +2276,9 @@ Alternatively, inputs can be rounded upon creation using the >>> Context(prec=5, rounding=ROUND_DOWN).create_decimal('1.2345678') Decimal('1.2345') -Q. Is the CPython implementation fast for large numbers? +Q: Is the CPython implementation fast for large numbers? -A. Yes. In the CPython and PyPy3 implementations, the C/CFFI versions of +A: Yes. In the CPython and PyPy3 implementations, the C/CFFI versions of the decimal module integrate the high speed `libmpdec `_ library for arbitrary precision correctly rounded decimal floating-point arithmetic [#]_. diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 284eeff5e4d..a24589fd0a5 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1673,9 +1673,13 @@ iterations of the loop. * ``0x02`` a dictionary of keyword-only parameters' default values * ``0x04`` a tuple of strings containing parameters' annotations * ``0x08`` a tuple containing cells for free variables, making a closure + * ``0x10`` the :term:`annotate function` for the function object .. versionadded:: 3.13 + .. versionchanged:: 3.14 + Added ``0x10`` to indicate the annotate function for the function object. + .. opcode:: BUILD_SLICE (argc) diff --git a/Doc/library/email.headerregistry.rst b/Doc/library/email.headerregistry.rst index 7f8044932fa..ff8b601fe3d 100644 --- a/Doc/library/email.headerregistry.rst +++ b/Doc/library/email.headerregistry.rst @@ -294,7 +294,7 @@ variant, :attr:`~.BaseHeader.max_count` is set to 1. ``inline`` and ``attachment`` are the only valid values in common use. -.. class:: ContentTransferEncoding +.. class:: ContentTransferEncodingHeader Handles the :mailheader:`Content-Transfer-Encoding` header. diff --git a/Doc/library/ensurepip.rst b/Doc/library/ensurepip.rst index fa102c4a080..32b92c01570 100644 --- a/Doc/library/ensurepip.rst +++ b/Doc/library/ensurepip.rst @@ -30,6 +30,8 @@ when creating a virtual environment) or after explicitly uninstalling needed to bootstrap ``pip`` are included as internal parts of the package. +.. include:: ../includes/optional-module.rst + .. seealso:: :ref:`installing-index` @@ -40,7 +42,9 @@ when creating a virtual environment) or after explicitly uninstalling .. include:: ../includes/wasm-mobile-notavail.rst -Command line interface +.. _ensurepip-cli: + +Command-line interface ---------------------- .. program:: ensurepip diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 61799e303a1..8314fed80fa 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -292,7 +292,9 @@ are always available. They are listed here in alphabetical order. :func:`property`. -.. function:: compile(source, filename, mode, flags=0, dont_inherit=False, optimize=-1) +.. function:: compile(source, filename, mode, flags=0, \ + dont_inherit=False, optimize=-1, \ + *, module=None) Compile the *source* into a code or AST object. Code objects can be executed by :func:`exec` or :func:`eval`. *source* can either be a normal string, a @@ -334,6 +336,10 @@ are always available. They are listed here in alphabetical order. ``__debug__`` is true), ``1`` (asserts are removed, ``__debug__`` is false) or ``2`` (docstrings are removed too). + The optional argument *module* specifies the module name. + It is needed to unambiguous :ref:`filter ` syntax warnings + by module name. + This function raises :exc:`SyntaxError` if the compiled source is invalid, and :exc:`ValueError` if the source contains null bytes. @@ -371,6 +377,9 @@ are always available. They are listed here in alphabetical order. ``ast.PyCF_ALLOW_TOP_LEVEL_AWAIT`` can now be passed in flags to enable support for top-level ``await``, ``async for``, and ``async with``. + .. versionadded:: 3.15 + Added the *module* parameter. + .. class:: complex(number=0, /) complex(string, /) @@ -1859,7 +1868,7 @@ are always available. They are listed here in alphabetical order. the same data with other ordering tools such as :func:`max` that rely on a different underlying method. Implementing all six comparisons also helps avoid confusion for mixed type comparisons which can call - reflected the :meth:`~object.__gt__` method. + the reflected :meth:`~object.__gt__` method. For sorting examples and a brief sorting tutorial, see :ref:`sortinghowto`. diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 37d9f87e779..221c0712c7c 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -42,11 +42,11 @@ The :mod:`functools` module defines the following functions: def factorial(n): return n * factorial(n-1) if n else 1 - >>> factorial(10) # no previously cached result, makes 11 recursive calls + >>> factorial(10) # no previously cached result, makes 11 recursive calls 3628800 - >>> factorial(5) # just looks up cached value result + >>> factorial(5) # no new calls, just returns the cached result 120 - >>> factorial(12) # makes two new recursive calls, the other 10 are cached + >>> factorial(12) # two new recursive calls, factorial(10) is cached 479001600 The cache is threadsafe so that the wrapped function can be used in @@ -57,6 +57,10 @@ The :mod:`functools` module defines the following functions: another thread makes an additional call before the initial call has been completed and cached. + Call-once behavior is not guaranteed because locks are not held during the + function call. Potentially another call with the same arguments could + occur while the first call is still running. + .. versionadded:: 3.9 @@ -672,7 +676,7 @@ The :mod:`functools` module defines the following functions: dispatch>` :term:`generic function`. To define a generic method, decorate it with the ``@singledispatchmethod`` - decorator. When defining a function using ``@singledispatchmethod``, note + decorator. When defining a method using ``@singledispatchmethod``, note that the dispatch happens on the type of the first non-*self* or non-*cls* argument:: @@ -716,6 +720,9 @@ The :mod:`functools` module defines the following functions: .. versionadded:: 3.8 + .. versionchanged:: 3.15 + Added support of non-:term:`descriptor` callables. + .. function:: update_wrapper(wrapper, wrapped, assigned=WRAPPER_ASSIGNMENTS, updated=WRAPPER_UPDATES) diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 2ef5c4b35a2..79a8c38626f 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -108,10 +108,19 @@ The :mod:`gc` module provides the following functions: * ``uncollectable`` is the total number of objects which were found to be uncollectable (and were therefore moved to the :data:`garbage` - list) inside this generation. + list) inside this generation; + + * ``candidates`` is the total number of objects in this generation which were + considered for collection and traversed; + + * ``duration`` is the total time in seconds spent in collections for this + generation. .. versionadded:: 3.4 + .. versionchanged:: next + Add ``duration`` and ``candidates``. + .. function:: set_threshold(threshold0, [threshold1, [threshold2]]) @@ -313,6 +322,12 @@ values but should not rebind them): "uncollectable": When *phase* is "stop", the number of objects that could not be collected and were put in :data:`garbage`. + "candidates": When *phase* is "stop", the total number of objects in this + generation which were considered for collection and traversed. + + "duration": When *phase* is "stop", the time in seconds spent in the + collection. + Applications can add their own callbacks to this list. The primary use cases are: @@ -325,6 +340,9 @@ values but should not rebind them): .. versionadded:: 3.3 + .. versionchanged:: next + Add "duration" and "candidates". + The following constants are provided for use with :func:`set_debug`: diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 4bdcec66088..d23c0741ddb 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -11,6 +11,8 @@ This module provides a simple interface to compress and decompress files just like the GNU programs :program:`gzip` and :program:`gunzip` would. +.. include:: ../includes/optional-module.rst + The data compression is provided by the :mod:`zlib` module. The :mod:`gzip` module provides the :class:`GzipFile` class, as well as the @@ -281,7 +283,7 @@ Example of how to GZIP compress a binary string:: .. _gzip-cli: -Command Line Interface +Command-line interface ---------------------- The :mod:`gzip` module provides a simple command line interface to compress or @@ -294,7 +296,7 @@ Once executed the :mod:`gzip` module keeps the input file(s). Add a new command line interface with a usage. By default, when you will execute the CLI, the default compression level is 6. -Command line options +Command-line options ^^^^^^^^^^^^^^^^^^^^ .. option:: file diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index 95ef72469b1..5049262306a 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -58,6 +58,11 @@ functions, respectively. The following functions are provided for min-heaps: +.. function:: heapify(x) + + Transform list *x* into a min-heap, in-place, in linear time. + + .. function:: heappush(heap, item) Push the value *item* onto the *heap*, maintaining the min-heap invariant. @@ -77,11 +82,6 @@ The following functions are provided for min-heaps: followed by a separate call to :func:`heappop`. -.. function:: heapify(x) - - Transform list *x* into a min-heap, in-place, in linear time. - - .. function:: heapreplace(heap, item) Pop and return the smallest item from the *heap*, and also push the new *item*. diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index 589152f2968..7c258b324d9 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -133,7 +133,7 @@ This module provides the following function: Parse the headers from a file pointer *fp* representing a HTTP request/response. The file has to be a :class:`~io.BufferedIOBase` reader - (i.e. not text) and must provide a valid :rfc:`2822` style header. + (i.e. not text) and must provide a valid :rfc:`5322` style header. This function returns an instance of :class:`http.client.HTTPMessage` that holds the header fields, but no payload diff --git a/Doc/library/http.cookiejar.rst b/Doc/library/http.cookiejar.rst index 251aea891c3..fcb0069b760 100644 --- a/Doc/library/http.cookiejar.rst +++ b/Doc/library/http.cookiejar.rst @@ -12,7 +12,7 @@ -------------- The :mod:`http.cookiejar` module defines classes for automatic handling of HTTP -cookies. It is useful for accessing web sites that require small pieces of data +cookies. It is useful for accessing websites that require small pieces of data -- :dfn:`cookies` -- to be set on the client machine by an HTTP response from a web server, and then returned to the server in later HTTP requests. diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 063344e0284..58f09634f95 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -154,7 +154,7 @@ instantiation, of which this module provides three different variants: variable. This instance parses and manages the headers in the HTTP request. The :func:`~http.client.parse_headers` function from :mod:`http.client` is used to parse the headers and it requires that the - HTTP request provide a valid :rfc:`2822` style header. + HTTP request provide a valid :rfc:`5322` style header. .. attribute:: rfile diff --git a/Doc/library/idle.rst b/Doc/library/idle.rst index e547c96b580..a16f46ef812 100644 --- a/Doc/library/idle.rst +++ b/Doc/library/idle.rst @@ -13,7 +13,7 @@ IDLE --- Python editor and shell single: Integrated Development Environment .. - Remember to update Lib/idlelib/help.html with idlelib.help.copy_source() when modifying this file. + Remember to update Lib/idlelib/help.html with idlelib.help.copy_strip() when modifying this file. -------------- @@ -37,6 +37,10 @@ IDLE has the following features: * configuration, browsers, and other dialogs +The IDLE application is implemented in the :mod:`idlelib` package. + +.. include:: ../includes/optional-module.rst + Menus ----- @@ -88,7 +92,7 @@ Save Save As... Save the current window with a Save As dialog. The file saved becomes the - new associated file for the window. (If your file namager is set to hide + new associated file for the window. (If your file manager is set to hide extensions, the current extension will be omitted in the file name box. If the new filename has no '.', '.py' and '.txt' will be added for Python and text files, except that on macOS Aqua,'.py' is added for all files.) @@ -206,7 +210,7 @@ New Indent Width Strip Trailing Whitespace Remove trailing space and other whitespace characters after the last - non-whitespace character of a line by applying str.rstrip to each line, + non-whitespace character of a line by applying :meth:`str.rstrip` to each line, including lines within multiline strings. Except for Shell windows, remove extra newlines at the end of the file. @@ -657,7 +661,9 @@ looked for in the user's home directory. Statements in this file will be executed in the Tk namespace, so this file is not useful for importing functions to be used from IDLE's Python shell. -Command line usage +.. _idlelib-cli: + +Command-line usage ^^^^^^^^^^^^^^^^^^ .. program:: idle diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index 602a7100a12..3f0a54ac535 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -459,7 +459,7 @@ ABC hierarchy:: .. versionchanged:: 3.4 Raises :exc:`ImportError` instead of :exc:`NotImplementedError`. - .. staticmethod:: source_to_code(data, path='') + .. staticmethod:: source_to_code(data, path='', fullname=None) Create a code object from Python source. @@ -471,11 +471,19 @@ ABC hierarchy:: With the subsequent code object one can execute it in a module by running ``exec(code, module.__dict__)``. + The optional argument *fullname* specifies the module name. + It is needed to unambiguous :ref:`filter ` syntax + warnings by module name. + .. versionadded:: 3.4 .. versionchanged:: 3.5 Made the method static. + .. versionadded:: 3.15 + Added the *fullname* parameter. + + .. method:: exec_module(module) Implementation of :meth:`Loader.exec_module`. @@ -1040,7 +1048,7 @@ find and load modules. :meth:`PathFinder.invalidate_caches` invalidates :class:`NamespacePath`, forcing the path value to be recomputed next time it is accessed. - .. versionadded:: next + .. versionadded:: 3.15 .. class:: SourceFileLoader(fullname, path) diff --git a/Doc/library/inspect.rst b/Doc/library/inspect.rst index 2b3b294ff33..5220c559d3d 100644 --- a/Doc/library/inspect.rst +++ b/Doc/library/inspect.rst @@ -619,17 +619,29 @@ attributes (see :ref:`import-mod-attrs` for module attributes): Retrieving source code ---------------------- -.. function:: getdoc(object) +.. function:: getdoc(object, *, inherit_class_doc=True, fallback_to_class_doc=True) Get the documentation string for an object, cleaned up with :func:`cleandoc`. - If the documentation string for an object is not provided and the object is - a class, a method, a property or a descriptor, retrieve the documentation - string from the inheritance hierarchy. + If the documentation string for an object is not provided: + + * if the object is a class and *inherit_class_doc* is true (by default), + retrieve the documentation string from the inheritance hierarchy; + * if the object is a method, a property or a descriptor, retrieve + the documentation string from the inheritance hierarchy; + * otherwise, if *fallback_to_class_doc* is true (by default), retrieve + the documentation string from the class of the object. + Return ``None`` if the documentation string is invalid or missing. .. versionchanged:: 3.5 Documentation strings are now inherited if not overridden. + .. versionchanged:: 3.15 + Added parameters *inherit_class_doc* and *fallback_to_class_doc*. + + Documentation strings on :class:`~functools.cached_property` + objects are now inherited if not overriden. + .. function:: getcomments(object) @@ -1776,7 +1788,7 @@ Buffer flags .. _inspect-module-cli: -Command Line Interface +Command-line interface ---------------------- The :mod:`inspect` module also provides a basic introspection capability diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 12a5a96a3c5..8b4217c210d 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -183,8 +183,10 @@ Basic Usage :param bool ensure_ascii: If ``True`` (the default), the output is guaranteed to - have all incoming non-ASCII characters escaped. - If ``False``, these characters will be outputted as-is. + have all incoming non-ASCII and non-printable characters escaped. + If ``False``, all characters will be outputted as-is, except for + the characters that must be escaped: quotation mark, reverse solidus, + and the control characters U+0000 through U+001F. :param bool check_circular: If ``False``, the circular reference check for container types is skipped @@ -495,8 +497,10 @@ Encoders and Decoders :class:`bool` or ``None``. If *skipkeys* is true, such items are simply skipped. If *ensure_ascii* is true (the default), the output is guaranteed to - have all incoming non-ASCII characters escaped. If *ensure_ascii* is - false, these characters will be output as-is. + have all incoming non-ASCII and non-printable characters escaped. + If *ensure_ascii* is false, all characters will be output as-is, except for + the characters that must be escaped: quotation mark, reverse solidus, + and the control characters U+0000 through U+001F. If *check_circular* is true (the default), then lists, dicts, and custom encoded objects will be checked for circular references during encoding to @@ -636,7 +640,7 @@ UTF-32, with UTF-8 being the recommended default for maximum interoperability. As permitted, though not required, by the RFC, this module's serializer sets *ensure_ascii=True* by default, thus escaping the output so that the resulting -strings only contain ASCII characters. +strings only contain printable ASCII characters. Other than the *ensure_ascii* parameter, this module is defined strictly in terms of conversion between Python objects and diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 4824391e597..94fc046d3f3 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -524,8 +524,8 @@ The :mod:`locale` module defines the following exception and functions: SSH connections. Python doesn't internally use locale-dependent character transformation functions - from ``ctype.h``. Instead, an internal ``pyctype.h`` provides locale-independent - equivalents like :c:macro:`!Py_TOLOWER`. + from ``ctype.h``. Instead, ``pyctype.h`` provides locale-independent + equivalents like :c:macro:`Py_TOLOWER`. .. data:: LC_COLLATE diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index d74ef73ee28..c9cfbdb4126 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -463,6 +463,7 @@ timed intervals. .. method:: getFilesToDelete() Returns a list of filenames which should be deleted as part of rollover. These + are the absolute paths of the oldest backup log files written by the handler. .. method:: shouldRollover(record) diff --git a/Doc/library/lzma.rst b/Doc/library/lzma.rst index 69f7cb8d48d..8a4f68f3502 100644 --- a/Doc/library/lzma.rst +++ b/Doc/library/lzma.rst @@ -23,6 +23,8 @@ module. Note that :class:`LZMAFile` and :class:`bz2.BZ2File` are *not* thread-safe, so if you need to use a single :class:`LZMAFile` instance from multiple threads, it is necessary to protect it with a lock. +.. include:: ../includes/optional-module.rst + .. exception:: LZMAError diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst index e8a96f29ea1..62e289573c0 100644 --- a/Doc/library/mailbox.rst +++ b/Doc/library/mailbox.rst @@ -917,7 +917,7 @@ Supported mailbox formats are Maildir, mbox, MH, Babyl, and MMDF. copied; furthermore, any format-specific information is converted insofar as possible if *message* is a :class:`!Message` instance. If *message* is a string, a byte string, - or a file, it should contain an :rfc:`2822`\ -compliant message, which is read + or a file, it should contain an :rfc:`5322`\ -compliant message, which is read and parsed. Files should be open in binary mode, but text mode files are accepted for backward compatibility. diff --git a/Doc/library/math.integer.rst b/Doc/library/math.integer.rst index 6a9fe74c5e8..0068ae2bdd5 100644 --- a/Doc/library/math.integer.rst +++ b/Doc/library/math.integer.rst @@ -4,7 +4,7 @@ .. module:: math.integer :synopsis: Integer-specific mathematics functions. -.. versionadded:: next +.. versionadded:: 3.15 -------------- diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 54c98346b27..d2ff74822f9 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -506,7 +506,7 @@ Summation and product functions Roughly equivalent to:: - sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q))) + sqrt(sum((px - qx) ** 2.0 for px, qx in zip(p, q, strict=True))) .. versionadded:: 3.8 @@ -781,7 +781,7 @@ the following functions from the :mod:`math.integer` module: Floats with integral values (like ``5.0``) are no longer accepted in the :func:`factorial` function. -.. deprecated:: next +.. deprecated:: 3.15 These aliases are :term:`soft deprecated` in favor of the :mod:`math.integer` functions. diff --git a/Doc/library/msvcrt.rst b/Doc/library/msvcrt.rst index 327cc3602b1..a2c5e375d2c 100644 --- a/Doc/library/msvcrt.rst +++ b/Doc/library/msvcrt.rst @@ -22,6 +22,8 @@ api. The normal API deals only with ASCII characters and is of limited use for internationalized applications. The wide char API should be used where ever possible. +.. availability:: Windows. + .. versionchanged:: 3.3 Operations in this module now raise :exc:`OSError` where :exc:`IOError` was raised. diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index d18ada3511d..92605c57527 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -22,8 +22,7 @@ to this, the :mod:`multiprocessing` module allows the programmer to fully leverage multiple processors on a given machine. It runs on both POSIX and Windows. -The :mod:`multiprocessing` module also introduces APIs which do not have -analogs in the :mod:`threading` module. A prime example of this is the +The :mod:`multiprocessing` module also introduces the :class:`~multiprocessing.pool.Pool` object which offers a convenient means of parallelizing the execution of a function across multiple input values, distributing the input data across processes (data parallelism). The following @@ -44,6 +43,10 @@ will print to standard output :: [1, 4, 9] +The :mod:`multiprocessing` module also introduces APIs which do not have +analogs in the :mod:`threading` module, like the ability to :meth:`terminate +`, :meth:`interrupt ` or :meth:`kill +` a running process. .. seealso:: @@ -829,8 +832,8 @@ raising an exception. One difference from other Python queue implementations, is that :mod:`multiprocessing` queues serializes all objects that are put into them using :mod:`pickle`. -The object return by the get method is a re-created object that does not share memory -with the original object. +The object returned by the get method is a re-created object that does not share +memory with the original object. Note that one can also create a shared queue by using a manager object -- see :ref:`multiprocessing-managers`. @@ -887,7 +890,7 @@ For an example of the usage of queues for interprocess communication see :ref:`multiprocessing-examples`. -.. function:: Pipe([duplex]) +.. function:: Pipe(duplex=True) Returns a pair ``(conn1, conn2)`` of :class:`~multiprocessing.connection.Connection` objects representing the @@ -1574,12 +1577,22 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + + .. method:: get_value() + + Return the current value of semaphore. + + Note that this may raise :exc:`NotImplementedError` on platforms like + macOS where ``sem_getvalue()`` is not implemented. + + .. method:: locked() Return a boolean indicating whether this object is locked right now. .. versionadded:: 3.14 + .. note:: On macOS, ``sem_timedwait`` is unsupported, so calling ``acquire()`` with diff --git a/Doc/library/os.rst b/Doc/library/os.rst index d31d0ce9c85..671270d6112 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -558,7 +558,7 @@ process and user. .. function:: initgroups(username, gid, /) - Call the system initgroups() to initialize the group access list with all of + Call the system ``initgroups()`` to initialize the group access list with all of the groups of which the specified username is a member, plus the specified group id. @@ -3404,7 +3404,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. class:: statx_result @@ -3661,7 +3661,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: STATX_TYPE @@ -3690,7 +3690,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_STATX_FORCE_SYNC @@ -3700,7 +3700,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_STATX_DONT_SYNC @@ -3709,7 +3709,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_STATX_SYNC_AS_STAT @@ -3721,7 +3721,7 @@ features: .. availability:: Linux >= 4.11 with glibc >= 2.28. - .. versionadded:: next + .. versionadded:: 3.15 .. data:: AT_NO_AUTOMOUNT @@ -3733,7 +3733,7 @@ features: .. availability:: Linux. - .. versionadded:: next + .. versionadded:: 3.15 .. function:: statvfs(path) @@ -3873,6 +3873,9 @@ features: Create a symbolic link pointing to *src* named *dst*. + The *src* parameter refers to the target of the link (the file or directory being linked to), + and *dst* is the name of the link being created. + On Windows, a symlink represents either a file or a directory, and does not morph to the target dynamically. If the target is present, the type of the symlink will be created to match. Otherwise, the symlink will be created diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index 90dc6648045..0bbdc425352 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -76,6 +76,10 @@ The debugger's prompt is ``(Pdb)``, which is the indicator that you are in debug .. _pdb-cli: + +Command-line interface +---------------------- + .. program:: pdb You can also invoke :mod:`pdb` from the command line to debug other scripts. For @@ -334,7 +338,7 @@ access further features, you have to do this yourself: .. _debugger-commands: -Debugger Commands +Debugger commands ----------------- The commands recognized by the debugger are listed below. Most commands can be diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst index faf8079db3d..03ad50b2c5e 100644 --- a/Doc/library/profile.rst +++ b/Doc/library/profile.rst @@ -265,6 +265,14 @@ Profile with real-time sampling statistics:: Sample all threads in the process instead of just the main thread +.. option:: --native + + Include artificial ```` frames to denote calls to non-Python code. + +.. option:: --no-gc + + Don't include artificial ```` frames to denote active garbage collection. + .. option:: --realtime-stats Print real-time sampling statistics during profiling @@ -339,79 +347,6 @@ The statistical profiler produces output similar to deterministic profilers but .. _profile-cli: -:mod:`!profiling.sampling` Module Reference -======================================================= - -.. module:: profiling.sampling - :synopsis: Python statistical profiler. - -This section documents the programmatic interface for the :mod:`!profiling.sampling` module. -For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information -about statistical profiling, see :ref:`statistical-profiling` - -.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False) - - Sample a Python process and generate profiling data. - - This is the main entry point for statistical profiling. It creates a - :class:`SampleProfiler`, collects stack traces from the target process, and - outputs the results in the specified format. - - :param int pid: Process ID of the target Python process - :param int sort: Sort order for pstats output (default: 2 for cumulative time) - :param int sample_interval_usec: Sampling interval in microseconds (default: 100) - :param int duration_sec: Duration to sample in seconds (default: 10) - :param str filename: Output filename (None for stdout/default naming) - :param bool all_threads: Whether to sample all threads (default: False) - :param int limit: Maximum number of functions to display (default: None) - :param bool show_summary: Whether to show summary statistics (default: True) - :param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats') - :param bool realtime_stats: Whether to display real-time statistics (default: False) - - :raises ValueError: If output_format is not 'pstats' or 'collapsed' - - Examples:: - - # Basic usage - profile process 1234 for 10 seconds - import profiling.sampling - profiling.sampling.sample(1234) - - # Profile with custom settings - profiling.sampling.sample(1234, duration_sec=30, sample_interval_usec=50, all_threads=True) - - # Generate collapsed stack traces for flamegraph.pl - profiling.sampling.sample(1234, output_format='collapsed', filename='profile.collapsed') - -.. class:: SampleProfiler(pid, sample_interval_usec, all_threads) - - Low-level API for the statistical profiler. - - This profiler uses periodic stack sampling to collect performance data - from running Python processes with minimal overhead. It can attach to - any Python process by PID and collect stack traces at regular intervals. - - :param int pid: Process ID of the target Python process - :param int sample_interval_usec: Sampling interval in microseconds - :param bool all_threads: Whether to sample all threads or just the main thread - - .. method:: sample(collector, duration_sec=10) - - Sample the target process for the specified duration. - - Collects stack traces from the target process at regular intervals - and passes them to the provided collector for processing. - - :param collector: Object that implements ``collect()`` method to process stack traces - :param int duration_sec: Duration to sample in seconds (default: 10) - - The method tracks sampling statistics and can display real-time - information if realtime_stats is enabled. - -.. seealso:: - - :ref:`sampling-profiler-cli` - Command-line interface documentation for the statistical profiler. - Deterministic Profiler Command Line Interface ============================================= diff --git a/Doc/library/pyexpat.rst b/Doc/library/pyexpat.rst index f533850c0ca..2f5db81955c 100644 --- a/Doc/library/pyexpat.rst +++ b/Doc/library/pyexpat.rst @@ -634,6 +634,15 @@ otherwise stated. .. method:: xmlparser.ExternalEntityRefHandler(context, base, systemId, publicId) + .. warning:: + + Implementing a handler that accesses local files and/or the network + may create a vulnerability to + `external entity attacks `_ + if :class:`xmlparser` is used with user-provided XML content. + Please reflect on your `threat model `_ + before implementing this handler. + Called for references to external entities. *base* is the current base, as set by a previous call to :meth:`SetBase`. The public and system identifiers, *systemId* and *publicId*, are strings if given; if the public identifier is not diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst index f649fce5efc..780cc773403 100644 --- a/Doc/library/readline.rst +++ b/Doc/library/readline.rst @@ -26,6 +26,8 @@ Readline library in general. .. include:: ../includes/wasm-mobile-notavail.rst +.. include:: ../includes/optional-module.rst + .. note:: The underlying Readline library API may be implemented by @@ -244,6 +246,15 @@ Startup hooks if Python was compiled for a version of the library that supports it. +.. function:: get_pre_input_hook() + + Get the current pre-input hook function, or ``None`` if no pre-input hook + function has been set. This function only exists if Python was compiled + for a version of the library that supports it. + + .. versionadded:: next + + .. _readline-completion: Completion diff --git a/Doc/library/select.rst b/Doc/library/select.rst index e821cb01d94..62b5161fb80 100644 --- a/Doc/library/select.rst +++ b/Doc/library/select.rst @@ -115,7 +115,7 @@ The module defines the following: :ref:`kevent-objects` below for the methods supported by kevent objects. -.. function:: select(rlist, wlist, xlist[, timeout]) +.. function:: select(rlist, wlist, xlist, timeout=None) This is a straightforward interface to the Unix :c:func:`!select` system call. The first three arguments are iterables of 'waitable objects': either @@ -131,7 +131,7 @@ The module defines the following: platform-dependent. (It is known to work on Unix but not on Windows.) The optional *timeout* argument specifies a time-out in seconds; it may be a non-integer to specify fractions of seconds. - When the *timeout* argument is omitted the function blocks until + When the *timeout* argument is omitted or ``None``, the function blocks until at least one file descriptor is ready. A time-out value of zero specifies a poll and never blocks. diff --git a/Doc/library/site.rst b/Doc/library/site.rst index e98dd83b60e..d93e4dc7c75 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -270,7 +270,7 @@ Module contents .. _site-commandline: -Command Line Interface +Command-line interface ---------------------- .. program:: site diff --git a/Doc/library/smtplib.rst b/Doc/library/smtplib.rst index c5a3de52090..3ee8b82a188 100644 --- a/Doc/library/smtplib.rst +++ b/Doc/library/smtplib.rst @@ -24,10 +24,13 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions). .. class:: SMTP(host='', port=0, local_hostname=None[, timeout], source_address=None) An :class:`SMTP` instance encapsulates an SMTP connection. It has methods - that support a full repertoire of SMTP and ESMTP operations. If the optional - *host* and *port* parameters are given, the SMTP :meth:`connect` method is - called with those parameters during initialization. If specified, - *local_hostname* is used as the FQDN of the local host in the HELO/EHLO + that support a full repertoire of SMTP and ESMTP operations. + + If the host parameter is set to a truthy value, :meth:`SMTP.connect` is called with + host and port automatically when the object is created; otherwise, :meth:`!connect` must + be called manually. + + If specified, *local_hostname* is used as the FQDN of the local host in the HELO/EHLO command. Otherwise, the local hostname is found using :func:`socket.getfqdn`. If the :meth:`connect` call returns anything other than a success code, an :exc:`SMTPConnectError` is raised. The optional @@ -62,6 +65,10 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions). ``smtplib.SMTP.send`` with arguments ``self`` and ``data``, where ``data`` is the bytes about to be sent to the remote host. + .. attribute:: SMTP.default_port + + The default port used for SMTP connections (25). + .. versionchanged:: 3.3 Support for the :keyword:`with` statement was added. @@ -80,15 +87,23 @@ Protocol) and :rfc:`1869` (SMTP Service Extensions). An :class:`SMTP_SSL` instance behaves exactly the same as instances of :class:`SMTP`. :class:`SMTP_SSL` should be used for situations where SSL is - required from the beginning of the connection and using :meth:`~SMTP.starttls` - is not appropriate. If *host* is not specified, the local host is used. If - *port* is zero, the standard SMTP-over-SSL port (465) is used. The optional - arguments *local_hostname*, *timeout* and *source_address* have the same + required from the beginning of the connection and using :meth:`SMTP.starttls` is + not appropriate. + + If the host parameter is set to a truthy value, :meth:`SMTP.connect` is called with host + and port automatically when the object is created; otherwise, :meth:`!SMTP.connect` must + be called manually. + + The optional arguments *local_hostname*, *timeout* and *source_address* have the same meaning as they do in the :class:`SMTP` class. *context*, also optional, can contain a :class:`~ssl.SSLContext` and allows configuring various aspects of the secure connection. Please read :ref:`ssl-security` for best practices. + .. attribute:: SMTP_SSL.default_port + + The default port used for SMTP-over-SSL connections (465). + .. versionchanged:: 3.3 *context* was added. @@ -259,6 +274,9 @@ An :class:`SMTP` instance has the following methods: 2-tuple of the response code and message sent by the server in its connection response. + If port is not changed from its default value of 0, the value of the :attr:`default_port` + attribute is used. + .. audit-event:: smtplib.connect self,host,port smtplib.SMTP.connect diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 89bca9b5b20..b16cde34ec1 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -482,6 +482,9 @@ The AF_* and SOCK_* constants are now :class:`AddressFamily` and .. versionchanged:: 3.14 Added support for ``TCP_QUICKACK`` on Windows platforms when available. + .. versionchanged:: next + ``IPV6_HDRINCL`` was added. + .. data:: AF_CAN PF_CAN @@ -2092,11 +2095,8 @@ to sockets. Accepts any real number, not only integer or float. -.. method:: socket.setsockopt(level, optname, value: int) -.. method:: socket.setsockopt(level, optname, value: buffer) - :noindex: -.. method:: socket.setsockopt(level, optname, None, optlen: int) - :noindex: +.. method:: socket.setsockopt(level, optname, value: int | Buffer) + socket.setsockopt(level, optname, None, optlen: int) .. index:: pair: module; struct diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 7bc2f7afbbb..491b8769f44 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -546,7 +546,7 @@ The difference is that the ``readline()`` call in the second handler will call first handler had to use a ``recv()`` loop to accumulate data until a newline itself. If it had just used a single ``recv()`` without the loop it would just have returned what has been received so far from the client. -TCP is stream based: data arrives in the order it was sent, but there no +TCP is stream based: data arrives in the order it was sent, but there is no correlation between client ``send()`` or ``sendall()`` calls and the number of ``recv()`` calls on the server required to receive it. diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 9d56e81dee1..3b1a9c2f6ee 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -31,7 +31,9 @@ PostgreSQL or Oracle. The :mod:`!sqlite3` module was written by Gerhard Häring. It provides an SQL interface compliant with the DB-API 2.0 specification described by :pep:`249`, and -requires SQLite 3.15.2 or newer. +requires the third-party `SQLite `_ library. + +.. include:: ../includes/optional-module.rst This document includes four main sections: diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index e0d85c852fa..7d30094963d 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -18,8 +18,9 @@ This module provides access to Transport Layer Security (often known as "Secure Sockets Layer") encryption and peer authentication facilities for network sockets, both client-side and server-side. This module uses the OpenSSL -library. It is available on all modern Unix systems, Windows, macOS, and -probably additional platforms, as long as OpenSSL is installed on that platform. +library. + +.. include:: ../includes/optional-module.rst .. note:: @@ -2958,16 +2959,16 @@ of TLS/SSL. Some new TLS 1.3 features are not yet available. Steve Kent :rfc:`RFC 4086: Randomness Requirements for Security <4086>` - Donald E., Jeffrey I. Schiller + Donald E. Eastlake, Jeffrey I. Schiller, Steve Crocker :rfc:`RFC 5280: Internet X.509 Public Key Infrastructure Certificate and Certificate Revocation List (CRL) Profile <5280>` - D. Cooper + David Cooper et al. :rfc:`RFC 5246: The Transport Layer Security (TLS) Protocol Version 1.2 <5246>` - T. Dierks et. al. + Tim Dierks and Eric Rescorla. :rfc:`RFC 6066: Transport Layer Security (TLS) Extensions <6066>` - D. Eastlake + Donald E. Eastlake `IANA TLS: Transport Layer Security (TLS) Parameters `_ IANA diff --git a/Doc/library/stat.rst b/Doc/library/stat.rst index 1cbec3ab847..82012b31a00 100644 --- a/Doc/library/stat.rst +++ b/Doc/library/stat.rst @@ -511,4 +511,4 @@ meaning of these constants. STATX_ATTR_DAX STATX_ATTR_WRITE_ATOMIC - .. versionadded:: next + .. versionadded:: 3.15 diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 97e7e08364e..3899e5b59d8 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1994,10 +1994,16 @@ expression support in the :mod:`re` module). ``{}``. Each replacement field contains either the numeric index of a positional argument, or the name of a keyword argument. Returns a copy of the string where each replacement field is replaced with the string value of - the corresponding argument. + the corresponding argument. For example: + + .. doctest:: >>> "The sum of 1 + 2 is {0}".format(1+2) 'The sum of 1 + 2 is 3' + >>> "The sum of {a} + {b} is {answer}".format(answer=1+2, a=1, b=2) + 'The sum of 1 + 2 is 3' + >>> "{1} expects the {0} Inquisition!".format("Spanish", "Nobody") + 'Nobody expects the Spanish Inquisition!' See :ref:`formatstrings` for a description of the various formatting options that can be specified in format strings. @@ -2057,13 +2063,32 @@ expression support in the :mod:`re` module). from the `Alphabetic property defined in the section 4.10 'Letters, Alphabetic, and Ideographic' of the Unicode Standard `__. + For example: + + .. doctest:: + + >>> 'Letters and spaces'.isalpha() + False + >>> 'LettersOnly'.isalpha() + True + >>> 'µ'.isalpha() # non-ASCII characters can be considered alphabetical too + True + + See :ref:`unicode-properties`. .. method:: str.isascii() Return ``True`` if the string is empty or all characters in the string are ASCII, ``False`` otherwise. - ASCII characters have code points in the range U+0000-U+007F. + ASCII characters have code points in the range U+0000-U+007F. For example: + + .. doctest:: + + >>> 'ASCII characters'.isascii() + True + >>> 'µ'.isascii() + False .. versionadded:: 3.7 @@ -2073,9 +2098,18 @@ expression support in the :mod:`re` module). Return ``True`` if all characters in the string are decimal characters and there is at least one character, ``False`` otherwise. Decimal characters are those that can be used to form - numbers in base 10, e.g. U+0660, ARABIC-INDIC DIGIT + numbers in base 10, such as U+0660, ARABIC-INDIC DIGIT ZERO. Formally a decimal character is a character in the Unicode - General Category "Nd". + General Category "Nd". For example: + + .. doctest:: + + >>> '0123456789'.isdecimal() + True + >>> '٠١٢٣٤٥٦٧٨٩'.isdecimal() # Arabic-Indic digits zero to nine + True + >>> 'alphabetic'.isdecimal() + False .. method:: str.isdigit() @@ -2194,7 +2228,16 @@ expression support in the :mod:`re` module). Return a string which is the concatenation of the strings in *iterable*. A :exc:`TypeError` will be raised if there are any non-string values in *iterable*, including :class:`bytes` objects. The separator between - elements is the string providing this method. + elements is the string providing this method. For example: + + .. doctest:: + + >>> ', '.join(['spam', 'spam', 'spam']) + 'spam, spam, spam' + >>> '-'.join('Python') + 'P-y-t-h-o-n' + + See also :meth:`split`. .. method:: str.ljust(width, fillchar=' ', /) @@ -2408,6 +2451,8 @@ expression support in the :mod:`re` module). >>> " foo ".split(maxsplit=0) ['foo '] + See also :meth:`join`. + .. index:: single: universal newlines; str.splitlines method @@ -2611,6 +2656,8 @@ expression support in the :mod:`re` module). single: : (colon); in formatted string literal single: = (equals); for help in debugging using string literals +.. _stdtypes-fstrings: + Formatted String Literals (f-strings) ------------------------------------- @@ -2619,123 +2666,147 @@ Formatted String Literals (f-strings) The :keyword:`await` and :keyword:`async for` can be used in expressions within f-strings. .. versionchanged:: 3.8 - Added the debugging operator (``=``) + Added the debug specifier (``=``) .. versionchanged:: 3.12 Many restrictions on expressions within f-strings have been removed. Notably, nested strings, comments, and backslashes are now permitted. An :dfn:`f-string` (formally a :dfn:`formatted string literal`) is a string literal that is prefixed with ``f`` or ``F``. -This type of string literal allows embedding arbitrary Python expressions -within *replacement fields*, which are delimited by curly brackets (``{}``). -These expressions are evaluated at runtime, similarly to :meth:`str.format`, -and are converted into regular :class:`str` objects. -For example: +This type of string literal allows embedding the results of arbitrary Python +expressions within *replacement fields*, which are delimited by curly +brackets (``{}``). +Each replacement field must contain an expression, optionally followed by: -.. doctest:: +* a *debug specifier* -- an equal sign (``=``); +* a *conversion specifier* -- ``!s``, ``!r`` or ``!a``; and/or +* a *format specifier* prefixed with a colon (``:``). - >>> who = 'nobody' - >>> nationality = 'Spanish' - >>> f'{who.title()} expects the {nationality} Inquisition!' - 'Nobody expects the Spanish Inquisition!' +See the :ref:`Lexical Analysis section on f-strings ` for details +on the syntax of these fields. -It is also possible to use a multi line f-string: +Debug specifier +^^^^^^^^^^^^^^^ -.. doctest:: +.. versionadded:: 3.8 - >>> f'''This is a string - ... on two lines''' - 'This is a string\non two lines' +If a debug specifier -- an equal sign (``=``) -- appears after the replacement +field expression, the resulting f-string will contain the expression's source, +the equal sign, and the value of the expression. +This is often useful for debugging:: -A single opening curly bracket, ``'{'``, marks a *replacement field* that -can contain any Python expression: + >>> number = 14.3 + >>> f'{number=}' + 'number=14.3' -.. doctest:: +Whitespace before, inside and after the expression, as well as whitespace +after the equal sign, is significant --- it is retained in the result:: - >>> nationality = 'Spanish' - >>> f'The {nationality} Inquisition!' - 'The Spanish Inquisition!' + >>> f'{ number - 4 = }' + ' number - 4 = 10.3' -To include a literal ``{`` or ``}``, use a double bracket: -.. doctest:: +Conversion specifier +^^^^^^^^^^^^^^^^^^^^ - >>> x = 42 - >>> f'{{x}} is {x}' - '{x} is 42' - -Functions can also be used, and :ref:`format specifiers `: - -.. doctest:: - - >>> from math import sqrt - >>> f'√2 \N{ALMOST EQUAL TO} {sqrt(2):.5f}' - '√2 ≈ 1.41421' - -Any non-string expression is converted using :func:`str`, by default: - -.. doctest:: +By default, the value of a replacement field expression is converted to +a string using :func:`str`:: >>> from fractions import Fraction - >>> f'{Fraction(1, 3)}' + >>> one_third = Fraction(1, 3) + >>> f'{one_third}' '1/3' -To use an explicit conversion, use the ``!`` (exclamation mark) operator, -followed by any of the valid formats, which are: +When a debug specifier but no format specifier is used, the default conversion +instead uses :func:`repr`:: -========== ============== -Conversion Meaning -========== ============== -``!a`` :func:`ascii` -``!r`` :func:`repr` -``!s`` :func:`str` -========== ============== + >>> f'{one_third = }' + 'one_third = Fraction(1, 3)' -For example: +The conversion can be specified explicitly using one of these specifiers: -.. doctest:: +* ``!s`` for :func:`str` +* ``!r`` for :func:`repr` +* ``!a`` for :func:`ascii` - >>> from fractions import Fraction - >>> f'{Fraction(1, 3)!s}' +For example:: + + >>> str(one_third) '1/3' - >>> f'{Fraction(1, 3)!r}' + >>> repr(one_third) 'Fraction(1, 3)' - >>> question = '¿Dónde está el Presidente?' - >>> print(f'{question!a}') - '\xbfD\xf3nde est\xe1 el Presidente?' -While debugging it may be helpful to see both the expression and its value, -by using the equals sign (``=``) after the expression. -This preserves spaces within the brackets, and can be used with a converter. -By default, the debugging operator uses the :func:`repr` (``!r``) conversion. -For example: + >>> f'{one_third!s} is {one_third!r}' + '1/3 is Fraction(1, 3)' -.. doctest:: + >>> string = "¡kočka 😸!" + >>> ascii(string) + "'\\xa1ko\\u010dka \\U0001f638!'" + + >>> f'{string = !a}' + "string = '\\xa1ko\\u010dka \\U0001f638!'" + + +Format specifier +^^^^^^^^^^^^^^^^ + +After the expression has been evaluated, and possibly converted using an +explicit conversion specifier, it is formatted using the :func:`format` function. +If the replacement field includes a *format specifier* introduced by a colon +(``:``), the specifier is passed to :func:`!format` as the second argument. +The result of :func:`!format` is then used as the final value for the +replacement field. For example:: >>> from fractions import Fraction - >>> calculation = Fraction(1, 3) - >>> f'{calculation=}' - 'calculation=Fraction(1, 3)' - >>> f'{calculation = }' - 'calculation = Fraction(1, 3)' - >>> f'{calculation = !s}' - 'calculation = 1/3' + >>> one_third = Fraction(1, 3) + >>> f'{one_third:.6f}' + '0.333333' + >>> f'{one_third:_^+10}' + '___+1/3___' + >>> >>> f'{one_third!r:_^20}' + '___Fraction(1, 3)___' + >>> f'{one_third = :~>10}~' + 'one_third = ~~~~~~~1/3~' -Once the output has been evaluated, it can be formatted using a -:ref:`format specifier ` following a colon (``':'``). -After the expression has been evaluated, and possibly converted to a string, -the :meth:`!__format__` method of the result is called with the format specifier, -or the empty string if no format specifier is given. -The formatted result is then used as the final value for the replacement field. -For example: +.. _stdtypes-tstrings: -.. doctest:: +Template String Literals (t-strings) +------------------------------------ - >>> from fractions import Fraction - >>> f'{Fraction(1, 7):.6f}' - '0.142857' - >>> f'{Fraction(1, 7):_^+10}' - '___+1/7___' +An :dfn:`t-string` (formally a :dfn:`template string literal`) is +a string literal that is prefixed with ``t`` or ``T``. + +These strings follow the same syntax and evaluation rules as +:ref:`formatted string literals `, +with for the following differences: + +* Rather than evaluating to a ``str`` object, template string literals evaluate + to a :class:`string.templatelib.Template` object. + +* The :func:`format` protocol is not used. + Instead, the format specifier and conversions (if any) are passed to + a new :class:`~string.templatelib.Interpolation` object that is created + for each evaluated expression. + It is up to code that processes the resulting :class:`~string.templatelib.Template` + object to decide how to handle format specifiers and conversions. + +* Format specifiers containing nested replacement fields are evaluated eagerly, + prior to being passed to the :class:`~string.templatelib.Interpolation` object. + For instance, an interpolation of the form ``{amount:.{precision}f}`` will + evaluate the inner expression ``{precision}`` to determine the value of the + ``format_spec`` attribute. + If ``precision`` were to be ``2``, the resulting format specifier + would be ``'.2f'``. + +* When the equals sign ``'='`` is provided in an interpolation expression, + the text of the expression is appended to the literal string that precedes + the relevant interpolation. + This includes the equals sign and any surrounding whitespace. + The :class:`!Interpolation` instance for the expression will be created as + normal, except that :attr:`~string.templatelib.Interpolation.conversion` will + be set to '``r``' (:func:`repr`) by default. + If an explicit conversion or format specifier are provided, + this will override the default behaviour. .. _old-string-formatting: @@ -3173,6 +3244,30 @@ objects. .. versionadded:: 3.14 + .. method:: take_bytes(n=None, /) + + Remove the first *n* bytes from the bytearray and return them as an immutable + :class:`bytes`. + By default (if *n* is ``None``), return all bytes and clear the bytearray. + + If *n* is negative, index from the end and take the first :func:`len` + plus *n* bytes. If *n* is out of bounds, raise :exc:`IndexError`. + + Taking less than the full length will leave remaining bytes in the + :class:`bytearray`, which requires a copy. If the remaining bytes should be + discarded, use :func:`~bytearray.resize` or :keyword:`del` to truncate + then :func:`~bytearray.take_bytes` without a size. + + .. impl-detail:: + + Taking all bytes is a zero-copy operation. + + .. versionadded:: 3.15 + + See the :ref:`What's New ` entry for + common code patterns which can be optimized with + :meth:`bytearray.take_bytes`. + Since bytearray objects are sequences of integers (akin to a list), for a bytearray object *b*, ``b[0]`` will be an integer, while ``b[0:1]`` will be a bytearray object of length 1. (This contrasts with text strings, where @@ -4580,7 +4675,7 @@ copying. .. versionadded:: 3.14 - .. method:: index(value, start=0, stop=sys.maxsize, /) + .. method:: index(value, start=0, stop=sys.maxsize, /) Return the index of the first occurrence of *value* (at or after index *start* and before index *stop*). @@ -4731,11 +4826,12 @@ other sequence-like behavior. There are currently two built-in set types, :class:`set` and :class:`frozenset`. The :class:`set` type is mutable --- the contents can be changed using methods -like :meth:`~set.add` and :meth:`~set.remove`. Since it is mutable, it has no -hash value and cannot be used as either a dictionary key or as an element of -another set. The :class:`frozenset` type is immutable and :term:`hashable` --- -its contents cannot be altered after it is created; it can therefore be used as -a dictionary key or as an element of another set. +like :meth:`~set.add` and :meth:`~set.remove`. +Since it is mutable, it has no hash value and cannot be used as +either a dictionary key or as an element of another set. +The :class:`frozenset` type is immutable and :term:`hashable` --- +its contents cannot be altered after it is created; +it can therefore be used as a dictionary key or as an element of another set. Non-empty sets (not frozensets) can be created by placing a comma-separated list of elements within braces, for example: ``{'jack', 'sjoerd'}``, in addition to the @@ -4752,164 +4848,172 @@ The constructors for both classes work the same: objects. If *iterable* is not specified, a new empty set is returned. - Sets can be created by several means: +Sets can be created by several means: - * Use a comma-separated list of elements within braces: ``{'jack', 'sjoerd'}`` - * Use a set comprehension: ``{c for c in 'abracadabra' if c not in 'abc'}`` - * Use the type constructor: ``set()``, ``set('foobar')``, ``set(['a', 'b', 'foo'])`` +* Use a comma-separated list of elements within braces: ``{'jack', 'sjoerd'}`` +* Use a set comprehension: ``{c for c in 'abracadabra' if c not in 'abc'}`` +* Use the type constructor: ``set()``, ``set('foobar')``, ``set(['a', 'b', 'foo'])`` - Instances of :class:`set` and :class:`frozenset` provide the following - operations: +Instances of :class:`set` and :class:`frozenset` provide the following +operations: - .. describe:: len(s) +.. describe:: len(s) - Return the number of elements in set *s* (cardinality of *s*). + Return the number of elements in set *s* (cardinality of *s*). - .. describe:: x in s +.. describe:: x in s - Test *x* for membership in *s*. + Test *x* for membership in *s*. - .. describe:: x not in s +.. describe:: x not in s - Test *x* for non-membership in *s*. + Test *x* for non-membership in *s*. - .. method:: isdisjoint(other, /) +.. method:: frozenset.isdisjoint(other, /) + set.isdisjoint(other, /) - Return ``True`` if the set has no elements in common with *other*. Sets are - disjoint if and only if their intersection is the empty set. + Return ``True`` if the set has no elements in common with *other*. Sets are + disjoint if and only if their intersection is the empty set. - .. method:: issubset(other, /) - set <= other +.. method:: frozenset.issubset(other, /) + set.issubset(other, /) +.. describe:: set <= other - Test whether every element in the set is in *other*. + Test whether every element in the set is in *other*. - .. method:: set < other +.. describe:: set < other - Test whether the set is a proper subset of *other*, that is, - ``set <= other and set != other``. + Test whether the set is a proper subset of *other*, that is, + ``set <= other and set != other``. - .. method:: issuperset(other, /) - set >= other +.. method:: frozenset.issuperset(other, /) + set.issuperset(other, /) +.. describe:: set >= other - Test whether every element in *other* is in the set. + Test whether every element in *other* is in the set. - .. method:: set > other +.. describe:: set > other - Test whether the set is a proper superset of *other*, that is, ``set >= - other and set != other``. + Test whether the set is a proper superset of *other*, that is, ``set >= + other and set != other``. - .. method:: union(*others) - set | other | ... +.. method:: frozenset.union(*others) + set.union(*others) +.. describe:: set | other | ... - Return a new set with elements from the set and all others. + Return a new set with elements from the set and all others. - .. method:: intersection(*others) - set & other & ... +.. method:: frozenset.intersection(*others) + set.intersection(*others) +.. describe:: set & other & ... - Return a new set with elements common to the set and all others. + Return a new set with elements common to the set and all others. - .. method:: difference(*others) - set - other - ... +.. method:: frozenset.difference(*others) + set.difference(*others) +.. describe:: set - other - ... - Return a new set with elements in the set that are not in the others. + Return a new set with elements in the set that are not in the others. - .. method:: symmetric_difference(other, /) - set ^ other +.. method:: frozenset.symmetric_difference(other, /) + set.symmetric_difference(other, /) +.. describe:: set ^ other - Return a new set with elements in either the set or *other* but not both. + Return a new set with elements in either the set or *other* but not both. - .. method:: copy() +.. method:: frozenset.copy() + set.copy() - Return a shallow copy of the set. + Return a shallow copy of the set. - Note, the non-operator versions of :meth:`union`, :meth:`intersection`, - :meth:`difference`, :meth:`symmetric_difference`, :meth:`issubset`, and - :meth:`issuperset` methods will accept any iterable as an argument. In - contrast, their operator based counterparts require their arguments to be - sets. This precludes error-prone constructions like ``set('abc') & 'cbs'`` - in favor of the more readable ``set('abc').intersection('cbs')``. +Note, the non-operator versions of :meth:`~frozenset.union`, +:meth:`~frozenset.intersection`, :meth:`~frozenset.difference`, :meth:`~frozenset.symmetric_difference`, :meth:`~frozenset.issubset`, and +:meth:`~frozenset.issuperset` methods will accept any iterable as an argument. In +contrast, their operator based counterparts require their arguments to be +sets. This precludes error-prone constructions like ``set('abc') & 'cbs'`` +in favor of the more readable ``set('abc').intersection('cbs')``. - Both :class:`set` and :class:`frozenset` support set to set comparisons. Two - sets are equal if and only if every element of each set is contained in the - other (each is a subset of the other). A set is less than another set if and - only if the first set is a proper subset of the second set (is a subset, but - is not equal). A set is greater than another set if and only if the first set - is a proper superset of the second set (is a superset, but is not equal). +Both :class:`set` and :class:`frozenset` support set to set comparisons. Two +sets are equal if and only if every element of each set is contained in the +other (each is a subset of the other). A set is less than another set if and +only if the first set is a proper subset of the second set (is a subset, but +is not equal). A set is greater than another set if and only if the first set +is a proper superset of the second set (is a superset, but is not equal). - Instances of :class:`set` are compared to instances of :class:`frozenset` - based on their members. For example, ``set('abc') == frozenset('abc')`` - returns ``True`` and so does ``set('abc') in set([frozenset('abc')])``. +Instances of :class:`set` are compared to instances of :class:`frozenset` +based on their members. For example, ``set('abc') == frozenset('abc')`` +returns ``True`` and so does ``set('abc') in set([frozenset('abc')])``. - The subset and equality comparisons do not generalize to a total ordering - function. For example, any two nonempty disjoint sets are not equal and are not - subsets of each other, so *all* of the following return ``False``: ``ab``. +The subset and equality comparisons do not generalize to a total ordering +function. For example, any two nonempty disjoint sets are not equal and are not +subsets of each other, so *all* of the following return ``False``: ``ab``. - Since sets only define partial ordering (subset relationships), the output of - the :meth:`list.sort` method is undefined for lists of sets. +Since sets only define partial ordering (subset relationships), the output of +the :meth:`list.sort` method is undefined for lists of sets. - Set elements, like dictionary keys, must be :term:`hashable`. +Set elements, like dictionary keys, must be :term:`hashable`. - Binary operations that mix :class:`set` instances with :class:`frozenset` - return the type of the first operand. For example: ``frozenset('ab') | - set('bc')`` returns an instance of :class:`frozenset`. +Binary operations that mix :class:`set` instances with :class:`frozenset` +return the type of the first operand. For example: ``frozenset('ab') | +set('bc')`` returns an instance of :class:`frozenset`. - The following table lists operations available for :class:`set` that do not - apply to immutable instances of :class:`frozenset`: +The following table lists operations available for :class:`set` that do not +apply to immutable instances of :class:`frozenset`: - .. method:: update(*others) - set |= other | ... +.. method:: set.update(*others) +.. describe:: set |= other | ... - Update the set, adding elements from all others. + Update the set, adding elements from all others. - .. method:: intersection_update(*others) - set &= other & ... +.. method:: set.intersection_update(*others) +.. describe:: set &= other & ... - Update the set, keeping only elements found in it and all others. + Update the set, keeping only elements found in it and all others. - .. method:: difference_update(*others) - set -= other | ... +.. method:: set.difference_update(*others) +.. describe:: set -= other | ... - Update the set, removing elements found in others. + Update the set, removing elements found in others. - .. method:: symmetric_difference_update(other, /) - set ^= other +.. method:: set.symmetric_difference_update(other, /) +.. describe:: set ^= other - Update the set, keeping only elements found in either set, but not in both. + Update the set, keeping only elements found in either set, but not in both. - .. method:: add(elem, /) +.. method:: set.add(elem, /) - Add element *elem* to the set. + Add element *elem* to the set. - .. method:: remove(elem, /) +.. method:: set.remove(elem, /) - Remove element *elem* from the set. Raises :exc:`KeyError` if *elem* is - not contained in the set. + Remove element *elem* from the set. Raises :exc:`KeyError` if *elem* is + not contained in the set. - .. method:: discard(elem, /) +.. method:: set.discard(elem, /) - Remove element *elem* from the set if it is present. + Remove element *elem* from the set if it is present. - .. method:: pop() +.. method:: set.pop() - Remove and return an arbitrary element from the set. Raises - :exc:`KeyError` if the set is empty. + Remove and return an arbitrary element from the set. Raises + :exc:`KeyError` if the set is empty. - .. method:: clear() +.. method:: set.clear() - Remove all elements from the set. + Remove all elements from the set. - Note, the non-operator versions of the :meth:`update`, - :meth:`intersection_update`, :meth:`difference_update`, and - :meth:`symmetric_difference_update` methods will accept any iterable as an - argument. +Note, the non-operator versions of the :meth:`~set.update`, +:meth:`~set.intersection_update`, :meth:`~set.difference_update`, and +:meth:`~set.symmetric_difference_update` methods will accept any iterable as an +argument. - Note, the *elem* argument to the :meth:`~object.__contains__`, - :meth:`remove`, and - :meth:`discard` methods may be a set. To support searching for an equivalent - frozenset, a temporary one is created from *elem*. +Note, the *elem* argument to the :meth:`~object.__contains__`, +:meth:`~set.remove`, and +:meth:`~set.discard` methods may be a set. To support searching for an equivalent +frozenset, a temporary one is created from *elem*. .. _typesmapping: diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 6336a0ec47b..58c836c7382 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -4,7 +4,7 @@ .. module:: string :synopsis: Common string operations. -**Source code:** :source:`Lib/string.py` +**Source code:** :source:`Lib/string/__init__.py` -------------- diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index 1aade881745..b8dfcc31077 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -831,7 +831,9 @@ Instances of the :class:`Popen` class have the following methods: If the process does not terminate after *timeout* seconds, a :exc:`TimeoutExpired` exception will be raised. Catching this exception and - retrying communication will not lose any output. + retrying communication will not lose any output. Supplying *input* to a + subsequent post-timeout :meth:`communicate` call is in undefined behavior + and may become an error in the future. The child process is not killed if the timeout expires, so in order to cleanup properly a well-behaved application should kill the child process and @@ -844,6 +846,11 @@ Instances of the :class:`Popen` class have the following methods: proc.kill() outs, errs = proc.communicate() + After a call to :meth:`~Popen.communicate` raises :exc:`TimeoutExpired`, do + not call :meth:`~Popen.wait`. Use an additional :meth:`~Popen.communicate` + call to finish handling pipes and populate the :attr:`~Popen.returncode` + attribute. + .. note:: The data read is buffered in memory, so do not use this method if the data diff --git a/Doc/library/symtable.rst b/Doc/library/symtable.rst index 54e19af4bd6..f5e6f9f8acf 100644 --- a/Doc/library/symtable.rst +++ b/Doc/library/symtable.rst @@ -21,11 +21,17 @@ tables. Generating Symbol Tables ------------------------ -.. function:: symtable(code, filename, compile_type) +.. function:: symtable(code, filename, compile_type, *, module=None) Return the toplevel :class:`SymbolTable` for the Python source *code*. *filename* is the name of the file containing the code. *compile_type* is like the *mode* argument to :func:`compile`. + The optional argument *module* specifies the module name. + It is needed to unambiguous :ref:`filter ` syntax warnings + by module name. + + .. versionadded:: 3.15 + Added the *module* parameter. Examining Symbol Tables diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 0f986aa580b..303655fb128 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -216,14 +216,17 @@ by another event: The :monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events are controlled by the :monitoring-event:`CALL` event. -:monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events will only be seen if the -corresponding :monitoring-event:`CALL` event is being monitored. +:monitoring-event:`C_RETURN` and :monitoring-event:`C_RAISE` events will only be +seen if the corresponding :monitoring-event:`CALL` event is being monitored. + + +.. _monitoring-event-global: Other events '''''''''''' Other events are not necessarily tied to a specific location in the -program and cannot be individually disabled. +program and cannot be individually disabled via :data:`DISABLE`. The other events that can be monitored are: @@ -289,12 +292,13 @@ in Python (see :ref:`c-api-monitoring`). .. function:: get_local_events(tool_id: int, code: CodeType, /) -> int - Returns all the local events for *code* + Returns all the :ref:`local events ` for *code* .. function:: set_local_events(tool_id: int, code: CodeType, event_set: int, /) -> None - Activates all the local events for *code* which are set in *event_set*. - Raises a :exc:`ValueError` if *tool_id* is not in use. + Activates all the :ref:`local events ` for *code* + which are set in *event_set*. Raises a :exc:`ValueError` if *tool_id* is not + in use. Disabling events @@ -305,15 +309,21 @@ Disabling events A special value that can be returned from a callback function to disable events for the current code location. -Local events can be disabled for a specific code location by returning -:data:`sys.monitoring.DISABLE` from a callback function. This does not change -which events are set, or any other code locations for the same event. +:ref:`Local events ` can be disabled for a specific code +location by returning :data:`sys.monitoring.DISABLE` from a callback function. +This does not change which events are set, or any other code locations for the +same event. Disabling events for specific locations is very important for high performance monitoring. For example, a program can be run under a debugger with no overhead if the debugger disables all monitoring except for a few breakpoints. +If :data:`DISABLE` is returned by a callback for a +:ref:`global event `, :exc:`ValueError` will be raised +by the interpreter in a non-specific location (that is, no traceback will be +provided). + .. function:: restart_events() -> None Enable all the events that were disabled by :data:`sys.monitoring.DISABLE` diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 843b8129c73..f8e99bc7d24 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1213,10 +1213,14 @@ always available. Unless explicitly noted otherwise, all variables are read-only The size of the seed key of the hash algorithm + .. attribute:: hash_info.cutoff + + Cutoff for small string DJBX33A optimization in range ``[1, cutoff)``. + .. versionadded:: 3.2 .. versionchanged:: 3.4 - Added *algorithm*, *hash_bits* and *seed_bits* + Added *algorithm*, *hash_bits*, *seed_bits*, and *cutoff*. .. data:: hexversion diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index c4614bf28a4..5ff8502bbe2 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -21,6 +21,14 @@ Some facts and figures: * reads and writes :mod:`gzip`, :mod:`bz2`, :mod:`compression.zstd`, and :mod:`lzma` compressed archives if the respective modules are available. + .. + The following paragraph should be similar to ../includes/optional-module.rst + + If any of these :term:`optional modules ` are missing from + your copy of CPython, look for documentation from your distributor (that is, + whoever provided Python to you). + If you are the distributor, see :ref:`optional-module-requirements`. + * read/write support for the POSIX.1-1988 (ustar) format. * read/write support for the GNU tar format including *longname* and *longlink* diff --git a/Doc/library/time.rst b/Doc/library/time.rst index 350ffade7af..a931134331f 100644 --- a/Doc/library/time.rst +++ b/Doc/library/time.rst @@ -407,9 +407,9 @@ Functions On Windows, if *secs* is zero, the thread relinquishes the remainder of its time slice to any other thread that is ready to run. If there are no other threads ready to run, the function returns immediately, and the thread - continues execution. On Windows 8.1 and newer the implementation uses + continues execution. On Windows 10 and newer the implementation uses a `high-resolution timer - `_ + `_ which provides resolution of 100 nanoseconds. If *secs* is zero, ``Sleep(0)`` is used. .. rubric:: Unix implementation @@ -584,7 +584,7 @@ Functions calculations when the day of the week and the year are specified. Here is an example, a format for dates compatible with that specified in the - :rfc:`2822` Internet email standard. [1]_ :: + :rfc:`5322` Internet email standard. [1]_ :: >>> from time import gmtime, strftime >>> strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) @@ -1066,4 +1066,5 @@ Timezone Constants strict reading of the original 1982 :rfc:`822` standard calls for a two-digit year (``%y`` rather than ``%Y``), but practice moved to 4-digit years long before the year 2000. After that, :rfc:`822` became obsolete and the 4-digit year has - been first recommended by :rfc:`1123` and then mandated by :rfc:`2822`. + been first recommended by :rfc:`1123` and then mandated by :rfc:`2822`, + with :rfc:`5322` continuing this requirement. diff --git a/Doc/library/tkinter.rst b/Doc/library/tkinter.rst index 22e08c45d01..81177533be8 100644 --- a/Doc/library/tkinter.rst +++ b/Doc/library/tkinter.rst @@ -36,6 +36,8 @@ details that are unchanged. Most documentation you will find online still uses the old API and can be woefully outdated. +.. include:: ../includes/optional-module.rst + .. seealso:: * `TkDocs `_ diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index b687231bd48..95a57c57e71 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -29,6 +29,8 @@ introduced in Logo `_, developed by Wally Feurzeig, Seymour Papert and Cynthia Solomon in 1967. +.. include:: ../includes/optional-module.rst + Get started =========== @@ -2799,68 +2801,68 @@ The demo scripts are: .. tabularcolumns:: |l|L|L| -+----------------+------------------------------+-----------------------+ -| Name | Description | Features | -+================+==============================+=======================+ -| bytedesign | complex classical | :func:`tracer`, delay,| -| | turtle graphics pattern | :func:`update` | -+----------------+------------------------------+-----------------------+ -| chaos | graphs Verhulst dynamics, | world coordinates | -| | shows that computer's | | -| | computations can generate | | -| | results sometimes against the| | -| | common sense expectations | | -+----------------+------------------------------+-----------------------+ -| clock | analog clock showing time | turtles as clock's | -| | of your computer | hands, ontimer | -+----------------+------------------------------+-----------------------+ -| colormixer | experiment with r, g, b | :func:`ondrag` | -+----------------+------------------------------+-----------------------+ -| forest | 3 breadth-first trees | randomization | -+----------------+------------------------------+-----------------------+ -| fractalcurves | Hilbert & Koch curves | recursion | -+----------------+------------------------------+-----------------------+ -| lindenmayer | ethnomathematics | L-System | -| | (indian kolams) | | -+----------------+------------------------------+-----------------------+ -| minimal_hanoi | Towers of Hanoi | Rectangular Turtles | -| | | as Hanoi discs | -| | | (shape, shapesize) | -+----------------+------------------------------+-----------------------+ -| nim | play the classical nim game | turtles as nimsticks, | -| | with three heaps of sticks | event driven (mouse, | -| | against the computer. | keyboard) | -+----------------+------------------------------+-----------------------+ -| paint | super minimalistic | :func:`onclick` | -| | drawing program | | -+----------------+------------------------------+-----------------------+ -| peace | elementary | turtle: appearance | -| | | and animation | -+----------------+------------------------------+-----------------------+ -| penrose | aperiodic tiling with | :func:`stamp` | -| | kites and darts | | -+----------------+------------------------------+-----------------------+ -| planet_and_moon| simulation of | compound shapes, | -| | gravitational system | :class:`Vec2D` | -+----------------+------------------------------+-----------------------+ -| rosette | a pattern from the wikipedia | :func:`clone`, | -| | article on turtle graphics | :func:`undo` | -+----------------+------------------------------+-----------------------+ -| round_dance | dancing turtles rotating | compound shapes, clone| -| | pairwise in opposite | shapesize, tilt, | -| | direction | get_shapepoly, update | -+----------------+------------------------------+-----------------------+ -| sorting_animate| visual demonstration of | simple alignment, | -| | different sorting methods | randomization | -+----------------+------------------------------+-----------------------+ -| tree | a (graphical) breadth | :func:`clone` | -| | first tree (using generators)| | -+----------------+------------------------------+-----------------------+ -| two_canvases | simple design | turtles on two | -| | | canvases | -+----------------+------------------------------+-----------------------+ -| yinyang | another elementary example | :func:`circle` | -+----------------+------------------------------+-----------------------+ ++------------------------+------------------------------+--------------------------------------+ +| Name | Description | Features | ++========================+==============================+======================================+ +| ``bytedesign`` | complex classical | :func:`tracer`, :func:`delay`, | +| | turtle graphics pattern | :func:`update` | ++------------------------+------------------------------+--------------------------------------+ +| ``chaos`` | graphs Verhulst dynamics, | world coordinates | +| | shows that computer's | | +| | computations can generate | | +| | results sometimes against the| | +| | common sense expectations | | ++------------------------+------------------------------+--------------------------------------+ +| ``clock`` | analog clock showing time | turtles as clock's | +| | of your computer | hands, :func:`ontimer` | ++------------------------+------------------------------+--------------------------------------+ +| ``colormixer`` | experiment with r, g, b | :func:`ondrag` | ++------------------------+------------------------------+--------------------------------------+ +| ``forest`` | 3 breadth-first trees | randomization | ++------------------------+------------------------------+--------------------------------------+ +| ``fractalcurves`` | Hilbert & Koch curves | recursion | ++------------------------+------------------------------+--------------------------------------+ +| ``lindenmayer`` | ethnomathematics | L-System | +| | (indian kolams) | | ++------------------------+------------------------------+--------------------------------------+ +| ``minimal_hanoi`` | Towers of Hanoi | Rectangular Turtles | +| | | as Hanoi discs | +| | | (:func:`shape`, :func:`shapesize`) | ++------------------------+------------------------------+--------------------------------------+ +| ``nim`` | play the classical nim game | turtles as nimsticks, | +| | with three heaps of sticks | event driven (mouse, | +| | against the computer. | keyboard) | ++------------------------+------------------------------+--------------------------------------+ +| ``paint`` | super minimalistic | :func:`onclick` | +| | drawing program | | ++------------------------+------------------------------+--------------------------------------+ +| ``peace`` | elementary | turtle: appearance | +| | | and animation | ++------------------------+------------------------------+--------------------------------------+ +| ``penrose`` | aperiodic tiling with | :func:`stamp` | +| | kites and darts | | ++------------------------+------------------------------+--------------------------------------+ +| ``planet_and_moon`` | simulation of | compound shapes, | +| | gravitational system | :class:`Vec2D` | ++------------------------+------------------------------+--------------------------------------+ +| ``rosette`` | a pattern from the wikipedia | :func:`clone`, | +| | article on turtle graphics | :func:`undo` | ++------------------------+------------------------------+--------------------------------------+ +| ``round_dance`` | dancing turtles rotating | compound shapes, :func:`clone` | +| | pairwise in opposite | :func:`shapesize`, :func:`tilt`, | +| | direction | :func:`get_shapepoly`, :func:`update`| ++------------------------+------------------------------+--------------------------------------+ +| ``sorting_animate`` | visual demonstration of | simple alignment, | +| | different sorting methods | randomization | ++------------------------+------------------------------+--------------------------------------+ +| ``tree`` | a (graphical) breadth | :func:`clone` | +| | first tree (using generators)| | ++------------------------+------------------------------+--------------------------------------+ +| ``two_canvases`` | simple design | turtles on two | +| | | canvases | ++------------------------+------------------------------+--------------------------------------+ +| ``yinyang`` | another elementary example | :func:`circle` | ++------------------------+------------------------------+--------------------------------------+ Have fun! diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst index fd5f56bd7ea..34f21f49b4b 100644 --- a/Doc/library/unicodedata.rst +++ b/Doc/library/unicodedata.rst @@ -156,7 +156,7 @@ following functions: >>> unicodedata.isxidstart('0') False - .. versionadded:: next + .. versionadded:: 3.15 .. function:: isxidcontinue(chr, /) @@ -171,7 +171,7 @@ following functions: >>> unicodedata.isxidcontinue(' ') False - .. versionadded:: next + .. versionadded:: 3.15 .. function:: decomposition(chr, /) diff --git a/Doc/library/unittest.mock-examples.rst b/Doc/library/unittest.mock-examples.rst index 00cc9bfc0a5..61c75b5a03b 100644 --- a/Doc/library/unittest.mock-examples.rst +++ b/Doc/library/unittest.mock-examples.rst @@ -600,13 +600,13 @@ this list of calls for us:: Partial mocking ~~~~~~~~~~~~~~~ -In some tests I wanted to mock out a call to :meth:`datetime.date.today` -to return a known date, but I didn't want to prevent the code under test from -creating new date objects. Unfortunately :class:`datetime.date` is written in C, and -so I couldn't just monkey-patch out the static :meth:`datetime.date.today` method. +For some tests, you may want to mock out a call to :meth:`datetime.date.today` +to return a known date, but don't want to prevent the code under test from +creating new date objects. Unfortunately :class:`datetime.date` is written in C, +so you cannot just monkey-patch out the static :meth:`datetime.date.today` method. -I found a simple way of doing this that involved effectively wrapping the date -class with a mock, but passing through calls to the constructor to the real +Instead, you can effectively wrap the date +class with a mock, while passing through calls to the constructor to the real class (and returning real instances). The :func:`patch decorator ` is used here to @@ -743,16 +743,15 @@ exception is raised in the setUp then tearDown is not called. Mocking Unbound Methods ~~~~~~~~~~~~~~~~~~~~~~~ -Whilst writing tests today I needed to patch an *unbound method* (patching the -method on the class rather than on the instance). I needed self to be passed -in as the first argument because I want to make asserts about which objects -were calling this particular method. The issue is that you can't patch with a -mock for this, because if you replace an unbound method with a mock it doesn't -become a bound method when fetched from the instance, and so it doesn't get -self passed in. The workaround is to patch the unbound method with a real -function instead. The :func:`patch` decorator makes it so simple to -patch out methods with a mock that having to create a real function becomes a -nuisance. +Sometimes a test needs to patch an *unbound method*, which means patching the +method on the class rather than on the instance. In order to make assertions +about which objects were calling this particular method, you need to pass +``self`` as the first argument. The issue is that you can't patch with a mock for +this, because if you replace an unbound method with a mock it doesn't become +a bound method when fetched from the instance, and so it doesn't get ``self`` +passed in. The workaround is to patch the unbound method with a real function +instead. The :func:`patch` decorator makes it so simple to patch out methods +with a mock that having to create a real function becomes a nuisance. If you pass ``autospec=True`` to patch then it does the patching with a *real* function object. This function object has the same signature as the one @@ -760,8 +759,8 @@ it is replacing, but delegates to a mock under the hood. You still get your mock auto-created in exactly the same way as before. What it means though, is that if you use it to patch out an unbound method on a class the mocked function will be turned into a bound method if it is fetched from an instance. -It will have ``self`` passed in as the first argument, which is exactly what I -wanted: +It will have ``self`` passed in as the first argument, which is exactly what +was needed: >>> class Foo: ... def foo(self): @@ -864,9 +863,9 @@ Here's one solution that uses the :attr:`~Mock.side_effect` functionality. If you provide a ``side_effect`` function for a mock then ``side_effect`` will be called with the same args as the mock. This gives us an opportunity to copy the arguments and store them for later assertions. In this -example I'm using *another* mock to store the arguments so that I can use the +example we're using *another* mock to store the arguments so that we can use the mock methods for doing the assertion. Again a helper function sets this up for -me. :: +us. :: >>> from copy import deepcopy >>> from unittest.mock import Mock, patch, DEFAULT diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index fe45becce2e..0bc0a953fd9 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -438,7 +438,7 @@ run whether the test method succeeded or not. Such a working environment for the testing code is called a :dfn:`test fixture`. A new TestCase instance is created as a unique test fixture used to execute each individual test method. Thus -:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`~TestCase.__init__` +:meth:`~TestCase.setUp`, :meth:`~TestCase.tearDown`, and :meth:`!TestCase.__init__` will be called once per test. It is recommended that you use TestCase implementations to group tests together @@ -518,7 +518,7 @@ set-up and tear-down methods:: subclasses will make future test refactorings infinitely easier. In some cases, the existing tests may have been written using the :mod:`doctest` -module. If so, :mod:`doctest` provides a :class:`DocTestSuite` class that can +module. If so, :mod:`doctest` provides a :class:`~doctest.DocTestSuite` class that can automatically build :class:`unittest.TestSuite` instances from the existing :mod:`doctest`\ -based tests. @@ -1023,7 +1023,7 @@ Test cases additional keyword argument *msg*. The context manager will store the caught exception object in its - :attr:`exception` attribute. This can be useful if the intention + :attr:`!exception` attribute. This can be useful if the intention is to perform additional checks on the exception raised:: with self.assertRaises(SomeException) as cm: @@ -1036,7 +1036,7 @@ Test cases Added the ability to use :meth:`assertRaises` as a context manager. .. versionchanged:: 3.2 - Added the :attr:`exception` attribute. + Added the :attr:`!exception` attribute. .. versionchanged:: 3.3 Added the *msg* keyword argument when used as a context manager. @@ -1089,8 +1089,8 @@ Test cases additional keyword argument *msg*. The context manager will store the caught warning object in its - :attr:`warning` attribute, and the source line which triggered the - warnings in the :attr:`filename` and :attr:`lineno` attributes. + :attr:`!warning` attribute, and the source line which triggered the + warnings in the :attr:`!filename` and :attr:`!lineno` attributes. This can be useful if the intention is to perform additional checks on the warning caught:: @@ -1437,7 +1437,7 @@ Test cases that lists the differences between the sets. This method is used by default when comparing sets or frozensets with :meth:`assertEqual`. - Fails if either of *first* or *second* does not have a :meth:`set.difference` + Fails if either of *first* or *second* does not have a :meth:`~frozenset.difference` method. .. versionadded:: 3.1 @@ -1645,7 +1645,7 @@ Test cases .. method:: asyncSetUp() :async: - Method called to prepare the test fixture. This is called after :meth:`setUp`. + Method called to prepare the test fixture. This is called after :meth:`TestCase.setUp`. This is called immediately before calling the test method; other than :exc:`AssertionError` or :exc:`SkipTest`, any exception raised by this method will be considered an error rather than a test failure. The default implementation @@ -1655,7 +1655,7 @@ Test cases :async: Method called immediately after the test method has been called and the - result recorded. This is called before :meth:`tearDown`. This is called even if + result recorded. This is called before :meth:`~TestCase.tearDown`. This is called even if the test method raised an exception, so the implementation in subclasses may need to be particularly careful about checking internal state. Any exception, other than :exc:`AssertionError` or :exc:`SkipTest`, raised by this method will be @@ -1684,7 +1684,7 @@ Test cases Sets up a new event loop to run the test, collecting the result into the :class:`TestResult` object passed as *result*. If *result* is omitted or ``None``, a temporary result object is created (by calling - the :meth:`defaultTestResult` method) and used. The result object is + the :meth:`~TestCase.defaultTestResult` method) and used. The result object is returned to :meth:`run`'s caller. At the end of the test all the tasks in the event loop are cancelled. @@ -1805,7 +1805,7 @@ Grouping tests returned by repeated iterations before :meth:`TestSuite.run` must be the same for each call iteration. After :meth:`TestSuite.run`, callers should not rely on the tests returned by this method unless the caller uses a - subclass that overrides :meth:`TestSuite._removeTestAtIndex` to preserve + subclass that overrides :meth:`!TestSuite._removeTestAtIndex` to preserve test references. .. versionchanged:: 3.2 @@ -1816,10 +1816,10 @@ Grouping tests .. versionchanged:: 3.4 In earlier versions the :class:`TestSuite` held references to each :class:`TestCase` after :meth:`TestSuite.run`. Subclasses can restore - that behavior by overriding :meth:`TestSuite._removeTestAtIndex`. + that behavior by overriding :meth:`!TestSuite._removeTestAtIndex`. In the typical usage of a :class:`TestSuite` object, the :meth:`run` method - is invoked by a :class:`TestRunner` rather than by the end-user test harness. + is invoked by a :class:`!TestRunner` rather than by the end-user test harness. Loading and running tests @@ -1853,12 +1853,12 @@ Loading and running tests .. method:: loadTestsFromTestCase(testCaseClass) Return a suite of all test cases contained in the :class:`TestCase`\ -derived - :class:`testCaseClass`. + :class:`!testCaseClass`. A test case instance is created for each method named by :meth:`getTestCaseNames`. By default these are the method names beginning with ``test``. If :meth:`getTestCaseNames` returns no - methods, but the :meth:`runTest` method is implemented, a single test + methods, but the :meth:`!runTest` method is implemented, a single test case is created for that method instead. @@ -1905,13 +1905,13 @@ Loading and running tests case class will be picked up as "a test method within a test case class", rather than "a callable object". - For example, if you have a module :mod:`SampleTests` containing a - :class:`TestCase`\ -derived class :class:`SampleTestCase` with three test - methods (:meth:`test_one`, :meth:`test_two`, and :meth:`test_three`), the + For example, if you have a module :mod:`!SampleTests` containing a + :class:`TestCase`\ -derived class :class:`!SampleTestCase` with three test + methods (:meth:`!test_one`, :meth:`!test_two`, and :meth:`!test_three`), the specifier ``'SampleTests.SampleTestCase'`` would cause this method to return a suite which will run all three test methods. Using the specifier ``'SampleTests.SampleTestCase.test_two'`` would cause it to return a test - suite which will run only the :meth:`test_two` test method. The specifier + suite which will run only the :meth:`!test_two` test method. The specifier can refer to modules and packages which have not been imported; they will be imported as a side-effect. @@ -2058,7 +2058,7 @@ Loading and running tests Testing frameworks built on top of :mod:`unittest` may want access to the :class:`TestResult` object generated by running a set of tests for reporting purposes; a :class:`TestResult` instance is returned by the - :meth:`TestRunner.run` method for this purpose. + :meth:`!TestRunner.run` method for this purpose. :class:`TestResult` instances have the following attributes that will be of interest when inspecting the results of running a set of tests: @@ -2144,12 +2144,12 @@ Loading and running tests This method can be called to signal that the set of tests being run should be aborted by setting the :attr:`shouldStop` attribute to ``True``. - :class:`TestRunner` objects should respect this flag and return without + :class:`!TestRunner` objects should respect this flag and return without running any additional tests. For example, this feature is used by the :class:`TextTestRunner` class to stop the test framework when the user signals an interrupt from the - keyboard. Interactive tools which provide :class:`TestRunner` + keyboard. Interactive tools which provide :class:`!TestRunner` implementations can use this in a similar manner. The following methods of the :class:`TestResult` class are used to maintain @@ -2469,9 +2469,9 @@ Class and Module Fixtures ------------------------- Class and module level fixtures are implemented in :class:`TestSuite`. When -the test suite encounters a test from a new class then :meth:`tearDownClass` -from the previous class (if there is one) is called, followed by -:meth:`setUpClass` from the new class. +the test suite encounters a test from a new class then +:meth:`~TestCase.tearDownClass` from the previous class (if there is one) +is called, followed by :meth:`~TestCase.setUpClass` from the new class. Similarly if a test is from a different module from the previous test then ``tearDownModule`` from the previous module is run, followed by diff --git a/Doc/library/urllib.robotparser.rst b/Doc/library/urllib.robotparser.rst index 016fcdc75da..674f646c633 100644 --- a/Doc/library/urllib.robotparser.rst +++ b/Doc/library/urllib.robotparser.rst @@ -19,7 +19,7 @@ This module provides a single class, :class:`RobotFileParser`, which answers questions about whether or not a particular user agent can fetch a URL on the -web site that published the :file:`robots.txt` file. For more details on the +website that published the :file:`robots.txt` file. For more details on the structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html. diff --git a/Doc/library/warnings.rst b/Doc/library/warnings.rst index 2f3cf6008f5..0de7a90bfcb 100644 --- a/Doc/library/warnings.rst +++ b/Doc/library/warnings.rst @@ -513,7 +513,7 @@ Available Functions .. versionchanged:: 3.6 Add the *source* parameter. - .. versionchanged:: next + .. versionchanged:: 3.15 If no module is passed, test the filter regular expression against module names created from the path, not only the path itself. diff --git a/Doc/library/wave.rst b/Doc/library/wave.rst index a3f5bfd5e2f..7ff2c97992c 100644 --- a/Doc/library/wave.rst +++ b/Doc/library/wave.rst @@ -25,8 +25,9 @@ The :mod:`wave` module defines the following function and exception: .. function:: open(file, mode=None) - If *file* is a string, open the file by that name, otherwise treat it as a - file-like object. *mode* can be: + If *file* is a string, a :term:`path-like object` or a + :term:`bytes-like object` open the file by that name, otherwise treat it as + a file-like object. *mode* can be: ``'rb'`` Read only mode. @@ -52,6 +53,10 @@ The :mod:`wave` module defines the following function and exception: .. versionchanged:: 3.4 Added support for unseekable files. + .. versionchanged:: 3.15 + Added support for :term:`path-like objects ` + and :term:`bytes-like objects `. + .. exception:: Error An error raised when something is impossible because it violates the WAV diff --git a/Doc/library/webbrowser.rst b/Doc/library/webbrowser.rst index fd6abc70261..a2103d8fdd8 100644 --- a/Doc/library/webbrowser.rst +++ b/Doc/library/webbrowser.rst @@ -49,6 +49,11 @@ a new tab, with the browser being brought to the foreground. The use of the :mod:`webbrowser` module on iOS requires the :mod:`ctypes` module. If :mod:`ctypes` isn't available, calls to :func:`.open` will fail. +.. _webbrowser-cli: + +Command-line interface +---------------------- + .. program:: webbrowser The script :program:`webbrowser` can be used as a command-line interface for the @@ -232,7 +237,7 @@ Here are some simple examples:: .. _browser-controllers: -Browser Controller Objects +Browser controller objects -------------------------- Browser controllers provide the :attr:`~controller.name` attribute, diff --git a/Doc/library/winreg.rst b/Doc/library/winreg.rst index df8fb83a018..89def6e2afe 100644 --- a/Doc/library/winreg.rst +++ b/Doc/library/winreg.rst @@ -14,6 +14,8 @@ integer as the registry handle, a :ref:`handle object ` is used to ensure that the handles are closed correctly, even if the programmer neglects to explicitly close them. +.. availability:: Windows. + .. _exception-changed: .. versionchanged:: 3.3 @@ -816,6 +818,6 @@ integer handle, and also disconnect the Windows handle from the handle object. will automatically close *key* when control leaves the :keyword:`with` block. -.. versionchanged:: next +.. versionchanged:: 3.15 Handle objects are now compared by their underlying Windows handle value instead of object identity for equality comparisons. diff --git a/Doc/library/winsound.rst b/Doc/library/winsound.rst index 925984c3cdb..93c0c025982 100644 --- a/Doc/library/winsound.rst +++ b/Doc/library/winsound.rst @@ -13,6 +13,8 @@ The :mod:`winsound` module provides access to the basic sound-playing machinery provided by Windows platforms. It includes functions and several constants. +.. availability:: Windows. + .. function:: Beep(frequency, duration) diff --git a/Doc/library/xml.dom.pulldom.rst b/Doc/library/xml.dom.pulldom.rst index 8bceeecd463..a21cfaa4645 100644 --- a/Doc/library/xml.dom.pulldom.rst +++ b/Doc/library/xml.dom.pulldom.rst @@ -74,7 +74,7 @@ given point) or to make use of the :func:`DOMEventStream.expandNode` method and switch to DOM-related processing. -.. class:: PullDom(documentFactory=None) +.. class:: PullDOM(documentFactory=None) Subclass of :class:`xml.sax.handler.ContentHandler`. diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 881708a4dd7..e59759683a6 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -656,6 +656,10 @@ Functions .. versionchanged:: 3.13 Added the :meth:`!close` method. + .. versionchanged:: 3.15 + A :exc:`ResourceWarning` is now emitted if the iterator opened a file + and is not explicitly closed. + .. function:: parse(source, parser=None) diff --git a/Doc/library/xml.rst b/Doc/library/xml.rst index 3f745573474..acd8d399fe3 100644 --- a/Doc/library/xml.rst +++ b/Doc/library/xml.rst @@ -53,11 +53,22 @@ XML security An attacker can abuse XML features to carry out denial of service attacks, access local files, generate network connections to other machines, or -circumvent firewalls. +circumvent firewalls when attacker-controlled XML is being parsed, +in Python or elsewhere. -Expat versions lower than 2.6.0 may be vulnerable to "billion laughs", -"quadratic blowup" and "large tokens". Python may be vulnerable if it uses such -older versions of Expat as a system-provided library. +The built-in XML parsers of Python rely on the library `libexpat`_, commonly +called Expat, for parsing XML. + +By default, Expat itself does not access local files or create network +connections. + +Expat versions lower than 2.7.2 may be vulnerable to the "billion laughs", +"quadratic blowup" and "large tokens" vulnerabilities, or to disproportional +use of dynamic memory. +Python bundles a copy of Expat, and whether Python uses the bundled or a +system-wide Expat, depends on how the Python interpreter +:option:`has been configured <--with-system-expat>` in your environment. +Python may be vulnerable if it uses such older versions of Expat. Check :const:`!pyexpat.EXPAT_VERSION`. :mod:`xmlrpc` is **vulnerable** to the "decompression bomb" attack. @@ -90,5 +101,6 @@ large tokens be used to cause denial of service in the application parsing XML. The issue is known as :cve:`2023-52425`. +.. _libexpat: https://github.com/libexpat/libexpat .. _Billion Laughs: https://en.wikipedia.org/wiki/Billion_laughs .. _ZIP bomb: https://en.wikipedia.org/wiki/Zip_bomb diff --git a/Doc/library/xml.sax.handler.rst b/Doc/library/xml.sax.handler.rst index 38ca4507d81..f1af7253e43 100644 --- a/Doc/library/xml.sax.handler.rst +++ b/Doc/library/xml.sax.handler.rst @@ -96,6 +96,14 @@ for the feature and property names. .. data:: feature_external_ges + .. warning:: + + Enabling opens a vulnerability to + `external entity attacks `_ + if the parser is used with user-provided XML content. + Please reflect on your `threat model `_ + before enabling this feature. + | value: ``"http://xml.org/sax/features/external-general-entities"`` | true: Include all external general (text) entities. | false: Do not include external general entities. diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst index 5ee11d58c3d..7731f03d875 100644 --- a/Doc/library/xml.sax.utils.rst +++ b/Doc/library/xml.sax.utils.rst @@ -37,7 +37,7 @@ or as base classes. You can unescape other strings of data by passing a dictionary as the optional *entities* parameter. The keys and values must all be strings; each key will be - replaced with its corresponding value. ``'&'``, ``'<'``, and ``'>'`` + replaced with its corresponding value. ``'&'``, ``'<'``, and ``'>'`` are always unescaped, even if *entities* is provided. diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index a21c7d3e4e3..7e511237a6a 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -179,9 +179,9 @@ ServerProxy Objects A :class:`ServerProxy` instance has a method corresponding to each remote procedure call accepted by the XML-RPC server. Calling the method performs an RPC, dispatched by both name and argument signature (e.g. the same method name -can be overloaded with multiple argument signatures). The RPC finishes by -returning a value, which may be either returned data in a conformant type or a -:class:`Fault` or :class:`ProtocolError` object indicating an error. +can be overloaded with multiple argument signatures). The RPC finishes either +by returning data in a conformant type or by raising a :class:`Fault` or +:class:`ProtocolError` exception indicating an error. Servers that support the XML introspection API support some common methods grouped under the reserved :attr:`~ServerProxy.system` attribute: @@ -472,7 +472,7 @@ remote server into a single request [#]_. Create an object used to boxcar method calls. *server* is the eventual target of the call. Calls can be made to the result object, but they will immediately - return ``None``, and only store the call name and parameters in the + return ``None``, and only store the call name and arguments in the :class:`MultiCall` object. Calling the object itself causes all stored calls to be transmitted as a single ``system.multicall`` request. The result of this call is a :term:`generator`; iterating over this generator yields the individual diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index f6ec33640b6..ae4e25b13b9 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -16,13 +16,23 @@ provides tools to create, read, write, append, and list a ZIP file. Any advanced use of this module will require an understanding of the format, as defined in `PKZIP Application Note`_. -This module does not currently handle multi-disk ZIP files. +This module does not handle multipart ZIP files. It can handle ZIP files that use the ZIP64 extensions (that is ZIP files that are more than 4 GiB in size). It supports -decryption of encrypted files in ZIP archives, but it currently cannot +decryption of encrypted files in ZIP archives, but it cannot create an encrypted file. Decryption is extremely slow as it is implemented in native Python rather than C. +.. + The following paragraph should be similar to ../includes/optional-module.rst + +Handling compressed archives requires :term:`optional modules ` +such as :mod:`zlib`, :mod:`bz2`, :mod:`lzma`, and :mod:`compression.zstd`. +If any of them are missing from your copy of CPython, +look for documentation from your distributor (that is, +whoever provided Python to you). +If you are the distributor, see :ref:`optional-module-requirements`. + The module defines the following items: .. exception:: BadZipFile @@ -165,7 +175,7 @@ The module defines the following items: .. _zipfile-objects: -ZipFile Objects +ZipFile objects --------------- @@ -238,7 +248,7 @@ ZipFile Objects .. note:: *metadata_encoding* is an instance-wide setting for the ZipFile. - It is not currently possible to set this on a per-member basis. + It is not possible to set this on a per-member basis. This attribute is a workaround for legacy implementations which produce archives with names in the current locale encoding or code page (mostly @@ -561,7 +571,7 @@ The following data attributes are also available: .. _path-objects: -Path Objects +Path objects ------------ .. class:: Path(root, at='') @@ -697,7 +707,7 @@ changes. .. _pyzipfile-objects: -PyZipFile Objects +PyZipFile objects ----------------- The :class:`PyZipFile` constructor takes the same parameters as the @@ -774,7 +784,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the .. _zipinfo-objects: -ZipInfo Objects +ZipInfo objects --------------- Instances of the :class:`ZipInfo` class are returned by the :meth:`.getinfo` and @@ -944,7 +954,7 @@ Instances have the following methods and attributes: .. _zipfile-commandline: .. program:: zipfile -Command-Line Interface +Command-line interface ---------------------- The :mod:`zipfile` module provides a simple command-line interface to interact @@ -1019,7 +1029,7 @@ From file itself Decompression may fail due to incorrect password / CRC checksum / ZIP format or unsupported compression method / decryption. -File System limitations +File system limitations ~~~~~~~~~~~~~~~~~~~~~~~ Exceeding limitations on different file systems can cause decompression failed. diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index b961f7113d3..ce0a22b9456 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -8,9 +8,9 @@ -------------- For applications that require data compression, the functions in this module -allow compression and decompression, using the zlib library. The zlib library -has its own home page at https://www.zlib.net. zlib 1.2.2.1 is the minium -supported version. +allow compression and decompression, using the `zlib library `_. + +.. include:: ../includes/optional-module.rst zlib's functions have many options and often need to be used in a particular order. This documentation doesn't attempt to cover all of the permutations; diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 882b05e8731..5f79c6fe8f5 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -449,7 +449,7 @@ Sets These represent a mutable set. They are created by the built-in :func:`set` constructor and can be modified afterwards by several methods, such as - :meth:`add `. + :meth:`~set.add`. Frozen sets @@ -2630,8 +2630,8 @@ Notes on using *__slots__*: descriptor directly from the base class). This renders the meaning of the program undefined. In the future, a check may be added to prevent this. -* :exc:`TypeError` will be raised if nonempty *__slots__* are defined for a - class derived from a +* :exc:`TypeError` will be raised if *__slots__* other than *__dict__* and + *__weakref__* are defined for a class derived from a :c:member:`"variable-length" built-in type ` such as :class:`int`, :class:`bytes`, and :class:`tuple`. @@ -2656,6 +2656,10 @@ Notes on using *__slots__*: of the iterator's values. However, the *__slots__* attribute will be an empty iterator. +.. versionchanged:: next + Allowed defining the *__dict__* and *__weakref__* *__slots__* for any class. + + .. _class-customization: Customizing class creation diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index c655d6c52ec..165dfa69f88 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -174,7 +174,7 @@ Formally: .. grammar-snippet:: :group: python-grammar - strings: ( `STRING` | fstring)+ | tstring+ + strings: ( `STRING` | `fstring`)+ | `tstring`+ This feature is defined at the syntactical level, so it only works with literals. To concatenate string expressions at run time, the '+' operator may be used:: diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ea386706e8b..9322d8571f7 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -345,7 +345,15 @@ Whitespace between tokens Except at the beginning of a logical line or in string literals, the whitespace characters space, tab and formfeed can be used interchangeably to separate -tokens. Whitespace is needed between two tokens only if their concatenation +tokens: + +.. grammar-snippet:: + :group: python-grammar + + whitespace: ' ' | tab | formfeed + + +Whitespace is needed between two tokens only if their concatenation could otherwise be interpreted as a different token. For example, ``ab`` is one token, but ``a b`` is two tokens. However, ``+a`` and ``+ a`` both produce two tokens, ``+`` and ``a``, as ``+a`` is not a valid token. @@ -386,73 +394,29 @@ Names (identifiers and keywords) :data:`~token.NAME` tokens represent *identifiers*, *keywords*, and *soft keywords*. -Within the ASCII range (U+0001..U+007F), the valid characters for names -include the uppercase and lowercase letters (``A-Z`` and ``a-z``), -the underscore ``_`` and, except for the first character, the digits -``0`` through ``9``. +Names are composed of the following characters: + +* uppercase and lowercase letters (``A-Z`` and ``a-z``), +* the underscore (``_``), +* digits (``0`` through ``9``), which cannot appear as the first character, and +* non-ASCII characters. Valid names may only contain "letter-like" and + "digit-like" characters; see :ref:`lexical-names-nonascii` for details. Names must contain at least one character, but have no upper length limit. Case is significant. -Besides ``A-Z``, ``a-z``, ``_`` and ``0-9``, names can also use "letter-like" -and "number-like" characters from outside the ASCII range, as detailed below. - -All identifiers are converted into the `normalization form`_ NFKC while -parsing; comparison of identifiers is based on NFKC. - -Formally, the first character of a normalized identifier must belong to the -set ``id_start``, which is the union of: - -* Unicode category ```` - uppercase letters (includes ``A`` to ``Z``) -* Unicode category ```` - lowercase letters (includes ``a`` to ``z``) -* Unicode category ```` - titlecase letters -* Unicode category ```` - modifier letters -* Unicode category ```` - other letters -* Unicode category ```` - letter numbers -* {``"_"``} - the underscore -* ```` - an explicit set of characters in `PropList.txt`_ - to support backwards compatibility - -The remaining characters must belong to the set ``id_continue``, which is the -union of: - -* all characters in ``id_start`` -* Unicode category ```` - decimal numbers (includes ``0`` to ``9``) -* Unicode category ```` - connector punctuations -* Unicode category ```` - nonspacing marks -* Unicode category ```` - spacing combining marks -* ```` - another explicit set of characters in - `PropList.txt`_ to support backwards compatibility - -Unicode categories use the version of the Unicode Character Database as -included in the :mod:`unicodedata` module. - -These sets are based on the Unicode standard annex `UAX-31`_. -See also :pep:`3131` for further details. - -Even more formally, names are described by the following lexical definitions: +Formally, names are described by the following lexical definitions: .. grammar-snippet:: :group: python-grammar - NAME: `xid_start` `xid_continue`* - id_start: | | | | | | "_" | - id_continue: `id_start` | | | | | - xid_start: - xid_continue: - identifier: <`NAME`, except keywords> + NAME: `name_start` `name_continue`* + name_start: "a"..."z" | "A"..."Z" | "_" | + name_continue: name_start | "0"..."9" + identifier: <`NAME`, except keywords> -A non-normative listing of all valid identifier characters as defined by -Unicode is available in the `DerivedCoreProperties.txt`_ file in the Unicode -Character Database. - - -.. _UAX-31: https://www.unicode.org/reports/tr31/ -.. _PropList.txt: https://www.unicode.org/Public/17.0.0/ucd/PropList.txt -.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt -.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms +Note that not all names matched by this grammar are valid; see +:ref:`lexical-names-nonascii` for details. .. _keywords: @@ -555,6 +519,95 @@ characters: :ref:`atom-identifiers`. +.. _lexical-names-nonascii: + +Non-ASCII characters in names +----------------------------- + +Names that contain non-ASCII characters need additional normalization +and validation beyond the rules and grammar explained +:ref:`above `. +For example, ``ř_1``, ``蛇``, or ``साँप`` are valid names, but ``r〰2``, +``€``, or ``🐍`` are not. + +This section explains the exact rules. + +All names are converted into the `normalization form`_ NFKC while parsing. +This means that, for example, some typographic variants of characters are +converted to their "basic" form. For example, ``fiⁿₐˡᵢᶻₐᵗᵢᵒₙ`` normalizes to +``finalization``, so Python treats them as the same name:: + + >>> fiⁿₐˡᵢᶻₐᵗᵢᵒₙ = 3 + >>> finalization + 3 + +.. note:: + + Normalization is done at the lexical level only. + Run-time functions that take names as *strings* generally do not normalize + their arguments. + For example, the variable defined above is accessible at run time in the + :func:`globals` dictionary as ``globals()["finalization"]`` but not + ``globals()["fiⁿₐˡᵢᶻₐᵗᵢᵒₙ"]``. + +Similarly to how ASCII-only names must contain only letters, digits and +the underscore, and cannot start with a digit, a valid name must +start with a character in the "letter-like" set ``xid_start``, +and the remaining characters must be in the "letter- and digit-like" set +``xid_continue``. + +These sets based on the *XID_Start* and *XID_Continue* sets as defined by the +Unicode standard annex `UAX-31`_. +Python's ``xid_start`` additionally includes the underscore (``_``). +Note that Python does not necessarily conform to `UAX-31`_. + +A non-normative listing of characters in the *XID_Start* and *XID_Continue* +sets as defined by Unicode is available in the `DerivedCoreProperties.txt`_ +file in the Unicode Character Database. +For reference, the construction rules for the ``xid_*`` sets are given below. + +The set ``id_start`` is defined as the union of: + +* Unicode category ```` - uppercase letters (includes ``A`` to ``Z``) +* Unicode category ```` - lowercase letters (includes ``a`` to ``z``) +* Unicode category ```` - titlecase letters +* Unicode category ```` - modifier letters +* Unicode category ```` - other letters +* Unicode category ```` - letter numbers +* {``"_"``} - the underscore +* ```` - an explicit set of characters in `PropList.txt`_ + to support backwards compatibility + +The set ``xid_start`` then closes this set under NFKC normalization, by +removing all characters whose normalization is not of the form +``id_start id_continue*``. + +The set ``id_continue`` is defined as the union of: + +* ``id_start`` (see above) +* Unicode category ```` - decimal numbers (includes ``0`` to ``9``) +* Unicode category ```` - connector punctuations +* Unicode category ```` - nonspacing marks +* Unicode category ```` - spacing combining marks +* ```` - another explicit set of characters in + `PropList.txt`_ to support backwards compatibility + +Again, ``xid_continue`` closes this set under NFKC normalization. + +Unicode categories use the version of the Unicode Character Database as +included in the :mod:`unicodedata` module. + +.. _UAX-31: https://www.unicode.org/reports/tr31/ +.. _PropList.txt: https://www.unicode.org/Public/17.0.0/ucd/PropList.txt +.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/17.0.0/ucd/DerivedCoreProperties.txt +.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms + +.. seealso:: + + * :pep:`3131` -- Supporting Non-ASCII Identifiers + * :pep:`672` -- Unicode-related Security Considerations for Python + + .. _literals: Literals @@ -987,124 +1040,59 @@ f-strings --------- .. versionadded:: 3.6 +.. versionchanged:: 3.7 + The :keyword:`await` and :keyword:`async for` can be used in expressions + within f-strings. +.. versionchanged:: 3.8 + Added the debug specifier (``=``) +.. versionchanged:: 3.12 + Many restrictions on expressions within f-strings have been removed. + Notably, nested strings, comments, and backslashes are now permitted. A :dfn:`formatted string literal` or :dfn:`f-string` is a string literal -that is prefixed with '``f``' or '``F``'. These strings may contain -replacement fields, which are expressions delimited by curly braces ``{}``. -While other string literals always have a constant value, formatted strings -are really expressions evaluated at run time. +that is prefixed with '``f``' or '``F``'. +Unlike other string literals, f-strings do not have a constant value. +They may contain *replacement fields* delimited by curly braces ``{}``. +Replacement fields contain expressions which are evaluated at run time. +For example:: -Escape sequences are decoded like in ordinary string literals (except when -a literal is also marked as a raw string). After decoding, the grammar -for the contents of the string is: + >>> who = 'nobody' + >>> nationality = 'Spanish' + >>> f'{who.title()} expects the {nationality} Inquisition!' + 'Nobody expects the Spanish Inquisition!' -.. productionlist:: python-grammar - f_string: (`literal_char` | "{{" | "}}" | `replacement_field`)* - replacement_field: "{" `f_expression` ["="] ["!" `conversion`] [":" `format_spec`] "}" - f_expression: (`conditional_expression` | "*" `or_expr`) - : ("," `conditional_expression` | "," "*" `or_expr`)* [","] - : | `yield_expression` - conversion: "s" | "r" | "a" - format_spec: (`literal_char` | `replacement_field`)* - literal_char: +Any doubled curly braces (``{{`` or ``}}``) outside replacement fields +are replaced with the corresponding single curly brace:: -The parts of the string outside curly braces are treated literally, -except that any doubled curly braces ``'{{'`` or ``'}}'`` are replaced -with the corresponding single curly brace. A single opening curly -bracket ``'{'`` marks a replacement field, which starts with a -Python expression. To display both the expression text and its value after -evaluation, (useful in debugging), an equal sign ``'='`` may be added after the -expression. A conversion field, introduced by an exclamation point ``'!'`` may -follow. A format specifier may also be appended, introduced by a colon ``':'``. -A replacement field ends with a closing curly bracket ``'}'``. + >>> print(f'{{...}}') + {...} + +Other characters outside replacement fields are treated like in ordinary +string literals. +This means that escape sequences are decoded (except when a literal is +also marked as a raw string), and newlines are possible in triple-quoted +f-strings:: + + >>> name = 'Galahad' + >>> favorite_color = 'blue' + >>> print(f'{name}:\t{favorite_color}') + Galahad: blue + >>> print(rf"C:\Users\{name}") + C:\Users\Galahad + >>> print(f'''Three shall be the number of the counting + ... and the number of the counting shall be three.''') + Three shall be the number of the counting + and the number of the counting shall be three. Expressions in formatted string literals are treated like regular -Python expressions surrounded by parentheses, with a few exceptions. -An empty expression is not allowed, and both :keyword:`lambda` and -assignment expressions ``:=`` must be surrounded by explicit parentheses. +Python expressions. Each expression is evaluated in the context where the formatted string literal -appears, in order from left to right. Replacement expressions can contain -newlines in both single-quoted and triple-quoted f-strings and they can contain -comments. Everything that comes after a ``#`` inside a replacement field -is a comment (even closing braces and quotes). In that case, replacement fields -must be closed in a different line. - -.. code-block:: text - - >>> f"abc{a # This is a comment }" - ... + 3}" - 'abc5' - -.. versionchanged:: 3.7 - Prior to Python 3.7, an :keyword:`await` expression and comprehensions - containing an :keyword:`async for` clause were illegal in the expressions - in formatted string literals due to a problem with the implementation. - -.. versionchanged:: 3.12 - Prior to Python 3.12, comments were not allowed inside f-string replacement - fields. - -When the equal sign ``'='`` is provided, the output will have the expression -text, the ``'='`` and the evaluated value. Spaces after the opening brace -``'{'``, within the expression and after the ``'='`` are all retained in the -output. By default, the ``'='`` causes the :func:`repr` of the expression to be -provided, unless there is a format specified. When a format is specified it -defaults to the :func:`str` of the expression unless a conversion ``'!r'`` is -declared. - -.. versionadded:: 3.8 - The equal sign ``'='``. - -If a conversion is specified, the result of evaluating the expression -is converted before formatting. Conversion ``'!s'`` calls :func:`str` on -the result, ``'!r'`` calls :func:`repr`, and ``'!a'`` calls :func:`ascii`. - -The result is then formatted using the :func:`format` protocol. The -format specifier is passed to the :meth:`~object.__format__` method of the -expression or conversion result. An empty string is passed when the -format specifier is omitted. The formatted result is then included in -the final value of the whole string. - -Top-level format specifiers may include nested replacement fields. These nested -fields may include their own conversion fields and :ref:`format specifiers -`, but may not include more deeply nested replacement fields. The -:ref:`format specifier mini-language ` is the same as that used by -the :meth:`str.format` method. - -Formatted string literals may be concatenated, but replacement fields -cannot be split across literals. - -Some examples of formatted string literals:: - - >>> name = "Fred" - >>> f"He said his name is {name!r}." - "He said his name is 'Fred'." - >>> f"He said his name is {repr(name)}." # repr() is equivalent to !r - "He said his name is 'Fred'." - >>> width = 10 - >>> precision = 4 - >>> value = decimal.Decimal("12.34567") - >>> f"result: {value:{width}.{precision}}" # nested fields - 'result: 12.35' - >>> today = datetime(year=2017, month=1, day=27) - >>> f"{today:%B %d, %Y}" # using date format specifier - 'January 27, 2017' - >>> f"{today=:%B %d, %Y}" # using date format specifier and debugging - 'today=January 27, 2017' - >>> number = 1024 - >>> f"{number:#0x}" # using integer format specifier - '0x400' - >>> foo = "bar" - >>> f"{ foo = }" # preserves whitespace - " foo = 'bar'" - >>> line = "The mill's closed" - >>> f"{line = }" - 'line = "The mill\'s closed"' - >>> f"{line = :20}" - "line = The mill's closed " - >>> f"{line = !r:20}" - 'line = "The mill\'s closed" ' +appears, in order from left to right. +An empty expression is not allowed, and both :keyword:`lambda` and +assignment expressions ``:=`` must be surrounded by explicit parentheses:: + >>> f'{(half := 1/2)}, {half * 42}' + '0.5, 21.0' Reusing the outer f-string quoting type inside a replacement field is permitted:: @@ -1113,10 +1101,6 @@ permitted:: >>> f"abc {a["x"]} def" 'abc 2 def' -.. versionchanged:: 3.12 - Prior to Python 3.12, reuse of the same quoting type of the outer f-string - inside a replacement field was not possible. - Backslashes are also allowed in replacement fields and are evaluated the same way as in any other context:: @@ -1127,23 +1111,84 @@ way as in any other context:: b c -.. versionchanged:: 3.12 - Prior to Python 3.12, backslashes were not permitted inside an f-string - replacement field. +It is possible to nest f-strings:: -Formatted string literals cannot be used as docstrings, even if they do not -include expressions. + >>> name = 'world' + >>> f'Repeated:{f' hello {name}' * 3}' + 'Repeated: hello world hello world hello world' -:: +Portable Python programs should not use more than 5 levels of nesting. + +.. impl-detail:: + + CPython does not limit nesting of f-strings. + +Replacement expressions can contain newlines in both single-quoted and +triple-quoted f-strings and they can contain comments. +Everything that comes after a ``#`` inside a replacement field +is a comment (even closing braces and quotes). +This means that replacement fields with comments must be closed in a +different line: + +.. code-block:: text + + >>> a = 2 + >>> f"abc{a # This comment }" continues until the end of the line + ... + 3}" + 'abc5' + +After the expression, replacement fields may optionally contain: + +* a *debug specifier* -- an equal sign (``=``), optionally surrounded by + whitespace on one or both sides; +* a *conversion specifier* -- ``!s``, ``!r`` or ``!a``; and/or +* a *format specifier* prefixed with a colon (``:``). + +See the :ref:`Standard Library section on f-strings ` +for details on how these fields are evaluated. + +As that section explains, *format specifiers* are passed as the second argument +to the :func:`format` function to format a replacement field value. +For example, they can be used to specify a field width and padding characters +using the :ref:`Format Specification Mini-Language `:: + + >>> number = 14.3 + >>> f'{number:20.7f}' + ' 14.3000000' + +Top-level format specifiers may include nested replacement fields:: + + >>> field_size = 20 + >>> precision = 7 + >>> f'{number:{field_size}.{precision}f}' + ' 14.3000000' + +These nested fields may include their own conversion fields and +:ref:`format specifiers `:: + + >>> number = 3 + >>> f'{number:{field_size}}' + ' 3' + >>> f'{number:{field_size:05}}' + '00000000000000000003' + +However, these nested fields may not include more deeply nested replacement +fields. + +Formatted string literals cannot be used as :term:`docstrings `, +even if they do not include expressions:: >>> def foo(): ... f"Not a docstring" ... - >>> foo.__doc__ is None - True + >>> print(foo.__doc__) + None -See also :pep:`498` for the proposal that added formatted string literals, -and :meth:`str.format`, which uses a related format string mechanism. +.. seealso:: + + * :pep:`498` -- Literal String Interpolation + * :pep:`701` -- Syntactic formalization of f-strings + * :meth:`str.format`, which uses a related format string mechanism. .. _t-strings: @@ -1156,36 +1201,99 @@ t-strings A :dfn:`template string literal` or :dfn:`t-string` is a string literal that is prefixed with '``t``' or '``T``'. -These strings follow the same syntax and evaluation rules as -:ref:`formatted string literals `, with the following differences: +These strings follow the same syntax rules as +:ref:`formatted string literals `. +For differences in evaluation rules, see the +:ref:`Standard Library section on t-strings ` -* Rather than evaluating to a ``str`` object, template string literals evaluate - to a :class:`string.templatelib.Template` object. -* The :func:`format` protocol is not used. - Instead, the format specifier and conversions (if any) are passed to - a new :class:`~string.templatelib.Interpolation` object that is created - for each evaluated expression. - It is up to code that processes the resulting :class:`~string.templatelib.Template` - object to decide how to handle format specifiers and conversions. +Formal grammar for f-strings +---------------------------- -* Format specifiers containing nested replacement fields are evaluated eagerly, - prior to being passed to the :class:`~string.templatelib.Interpolation` object. - For instance, an interpolation of the form ``{amount:.{precision}f}`` will - evaluate the inner expression ``{precision}`` to determine the value of the - ``format_spec`` attribute. - If ``precision`` were to be ``2``, the resulting format specifier - would be ``'.2f'``. +F-strings are handled partly by the :term:`lexical analyzer`, which produces the +tokens :py:data:`~token.FSTRING_START`, :py:data:`~token.FSTRING_MIDDLE` +and :py:data:`~token.FSTRING_END`, and partly by the parser, which handles +expressions in the replacement field. +The exact way the work is split is a CPython implementation detail. -* When the equals sign ``'='`` is provided in an interpolation expression, - the text of the expression is appended to the literal string that precedes - the relevant interpolation. - This includes the equals sign and any surrounding whitespace. - The :class:`!Interpolation` instance for the expression will be created as - normal, except that :attr:`~string.templatelib.Interpolation.conversion` will - be set to '``r``' (:func:`repr`) by default. - If an explicit conversion or format specifier are provided, - this will override the default behaviour. +Correspondingly, the f-string grammar is a mix of +:ref:`lexical and syntactic definitions `. + +Whitespace is significant in these situations: + +* There may be no whitespace in :py:data:`~token.FSTRING_START` (between + the prefix and quote). +* Whitespace in :py:data:`~token.FSTRING_MIDDLE` is part of the literal + string contents. +* In ``fstring_replacement_field``, if ``f_debug_specifier`` is present, + all whitespace after the opening brace until the ``f_debug_specifier``, + as well as whitespace immediatelly following ``f_debug_specifier``, + is retained as part of the expression. + + .. impl-detail:: + + The expression is not handled in the tokenization phase; it is + retrieved from the source code using locations of the ``{`` token + and the token after ``=``. + + +The ``FSTRING_MIDDLE`` definition uses +:ref:`negative lookaheads ` (``!``) +to indicate special characters (backslash, newline, ``{``, ``}``) and +sequences (``f_quote``). + +.. grammar-snippet:: + :group: python-grammar + + fstring: `FSTRING_START` `fstring_middle`* `FSTRING_END` + + FSTRING_START: `fstringprefix` ("'" | '"' | "'''" | '"""') + FSTRING_END: `f_quote` + fstringprefix: <("f" | "fr" | "rf"), case-insensitive> + f_debug_specifier: '=' + f_quote: + + fstring_middle: + | `fstring_replacement_field` + | `FSTRING_MIDDLE` + FSTRING_MIDDLE: + | (!"\" !`newline` !'{' !'}' !`f_quote`) `source_character` + | `stringescapeseq` + | "{{" + | "}}" + | + fstring_replacement_field: + | '{' `f_expression` [`f_debug_specifier`] [`fstring_conversion`] + [`fstring_full_format_spec`] '}' + fstring_conversion: + | "!" ("s" | "r" | "a") + fstring_full_format_spec: + | ':' `fstring_format_spec`* + fstring_format_spec: + | `FSTRING_MIDDLE` + | `fstring_replacement_field` + f_expression: + | ','.(`conditional_expression` | "*" `or_expr`)+ [","] + | `yield_expression` + +.. note:: + + In the above grammar snippet, the ``f_quote`` and ``FSTRING_MIDDLE`` rules + are context-sensitive -- they depend on the contents of ``FSTRING_START`` + of the nearest enclosing ``fstring``. + + Constructing a more traditional formal grammar from this template is left + as an exercise for the reader. + +The grammar for t-strings is identical to the one for f-strings, with *t* +instead of *f* at the beginning of rule and token names and in the prefix. + +.. grammar-snippet:: + :group: python-grammar + + tstring: TSTRING_START tstring_middle* TSTRING_END + + .. _numbers: diff --git a/Doc/requirements.txt b/Doc/requirements.txt index d5f7b473c3a..716772b7f28 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -7,7 +7,7 @@ # won't suddenly cause build failures. Updating the version is fine as long # as no warnings are raised by doing so. # Keep this version in sync with ``Doc/conf.py``. -sphinx~=8.2.0 +sphinx~=9.0.0 blurb diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 6fee1c192c3..c41c70f0ed3 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -6,9 +6,7 @@ Doc/c-api/descriptor.rst Doc/c-api/float.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst -Doc/c-api/module.rst Doc/c-api/stable.rst -Doc/c-api/type.rst Doc/c-api/typeobj.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst @@ -30,14 +28,12 @@ Doc/library/pyexpat.rst Doc/library/select.rst Doc/library/socket.rst Doc/library/ssl.rst -Doc/library/stdtypes.rst Doc/library/termios.rst Doc/library/test.rst Doc/library/tkinter.rst Doc/library/tkinter.scrolledtext.rst Doc/library/tkinter.ttk.rst Doc/library/unittest.mock.rst -Doc/library/unittest.rst Doc/library/urllib.parse.rst Doc/library/urllib.request.rst Doc/library/wsgiref.rst diff --git a/Doc/tools/extensions/c_annotations.py b/Doc/tools/extensions/c_annotations.py index 089614a1f6c..e04a5f144c4 100644 --- a/Doc/tools/extensions/c_annotations.py +++ b/Doc/tools/extensions/c_annotations.py @@ -154,7 +154,10 @@ def add_annotations(app: Sphinx, doctree: nodes.document) -> None: node.insert(0, annotation) -def _stable_abi_annotation(record: StableABIEntry) -> nodes.emphasis: +def _stable_abi_annotation( + record: StableABIEntry, + is_corresponding_slot: bool = False, +) -> nodes.emphasis: """Create the Stable ABI annotation. These have two forms: @@ -168,9 +171,28 @@ def _stable_abi_annotation(record: StableABIEntry) -> nodes.emphasis: ... all of which can have "since version X.Y" appended. """ stable_added = record.added - message = sphinx_gettext("Part of the") - message = message.center(len(message) + 2) - emph_node = nodes.emphasis(message, message, classes=["stableabi"]) + emph_node = nodes.emphasis('', '', classes=["stableabi"]) + if is_corresponding_slot: + # See "Type slot annotations" in add_annotations + ref_node = addnodes.pending_xref( + "slot ID", + refdomain="c", + reftarget="PyType_Slot", + reftype="type", + refexplicit="True", + ) + ref_node += nodes.Text(sphinx_gettext("slot ID")) + + message = sphinx_gettext("The corresponding") + emph_node += nodes.Text(" " + message + " ") + emph_node += ref_node + emph_node += nodes.Text(" ") + emph_node += nodes.literal(record.name, record.name) + message = sphinx_gettext("is part of the") + emph_node += nodes.Text(" " + message + " ") + else: + message = sphinx_gettext("Part of the") + emph_node += nodes.Text(" " + message + " ") ref_node = addnodes.pending_xref( "Stable ABI", refdomain="std", @@ -265,6 +287,51 @@ def run(self) -> list[nodes.Node]: return [node] +class CorrespondingTypeSlot(SphinxDirective): + """Type slot annotations + + Docs for these are with the corresponding field, for example, + "Py_tp_repr" is documented under "PyTypeObject.tp_repr", with + only a stable ABI note mentioning "Py_tp_repr" (and linking to + docs on how this works). + + If there is no corresponding field, these should be documented as normal + macros. + """ + + has_content = False + + required_arguments = 1 + optional_arguments = 0 + + def run(self) -> list[nodes.Node]: + name = self.arguments[0] + state = self.env.domaindata["c_annotations"] + stable_abi_data = state["stable_abi_data"] + + try: + record = stable_abi_data[name] + except LookupError as err: + raise LookupError( + f"{name} is not part of stable ABI. " + + "Document it as `c:macro::` rather than " + + "`corresponding-type-slot::`." + ) from err + + annotation = _stable_abi_annotation(record, is_corresponding_slot=True) + + node = nodes.paragraph() + content = [ + ".. c:namespace:: NULL", + "", + ".. c:macro:: " + name, + " :no-typesetting:", + ] + self.state.nested_parse(StringList(content), 0, node) + node.insert(0, annotation) + return [node] + + def init_annotations(app: Sphinx) -> None: # Using domaindata is a bit hack-ish, # but allows storing state without a global variable or closure. @@ -281,6 +348,7 @@ def setup(app: Sphinx) -> ExtensionMetadata: app.add_config_value("refcount_file", "", "env", types={str}) app.add_config_value("stable_abi_file", "", "env", types={str}) app.add_directive("limited-api-list", LimitedAPIList) + app.add_directive("corresponding-type-slot", CorrespondingTypeSlot) app.connect("builder-inited", init_annotations) app.connect("doctree-read", add_annotations) diff --git a/Doc/tools/extensions/glossary_search.py b/Doc/tools/extensions/glossary_search.py index 502b6cd95bc..bcda4b7b435 100644 --- a/Doc/tools/extensions/glossary_search.py +++ b/Doc/tools/extensions/glossary_search.py @@ -38,7 +38,7 @@ def process_glossary_nodes( rendered = app.builder.render_partial(definition) terms[term.lower()] = { 'title': term, - 'body': rendered['html_body'], + 'body': rendered['fragment'], } diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index f5451adb37b..f9bf273e762 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -24,14 +24,6 @@ # Used in conf.py and updated here by python/release-tools/run_release.py SOURCE_URI = 'https://github.com/python/cpython/tree/main/%s' -# monkey-patch reST parser to disable alphabetic and roman enumerated lists -from docutils.parsers.rst.states import Body -Body.enum.converters['loweralpha'] = \ - Body.enum.converters['upperalpha'] = \ - Body.enum.converters['lowerroman'] = \ - Body.enum.converters['upperroman'] = lambda x: None - - class PyAwaitableMixin(object): def handle_signature(self, sig, signode): ret = super(PyAwaitableMixin, self).handle_signature(sig, signode) diff --git a/Doc/tools/templates/download.html b/Doc/tools/templates/download.html index f914ad86211..c78c650b1cb 100644 --- a/Doc/tools/templates/download.html +++ b/Doc/tools/templates/download.html @@ -31,8 +31,7 @@

{% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

{% if last_updated %}

{% trans %}Last updated on: {{ last_updated }}.{% endtrans %}

{% endif %} -

{% trans %}To download an archive containing all the documents for this version of -Python in one of various formats, follow one of links in this table.{% endtrans %}

+

{% trans %}Download an archive containing all the documentation for this version of Python:{% endtrans %}

@@ -62,8 +61,6 @@

{% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

-

{% trans %}These archives contain all the content in the documentation.{% endtrans %}

-

{% trans %} We no longer provide pre-built PDFs of the documentation. To build a PDF archive, follow the instructions in the @@ -75,7 +72,6 @@

{% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

See the directory listing for file sizes.{% endtrans %}

-

{% trans %}Problems{% endtrans %}

{% set bugs = pathto('bugs') %}

{% trans bugs = bugs %}Open an issue diff --git a/Doc/tools/templates/dummy.html b/Doc/tools/templates/dummy.html index 0fdbe2a5801..75f6607d8f3 100644 --- a/Doc/tools/templates/dummy.html +++ b/Doc/tools/templates/dummy.html @@ -27,8 +27,8 @@ In extensions/changes.py: -{% trans %}Deprecated since version {deprecated}, will be removed in version {removed}{% endtrans %} -{% trans %}Deprecated since version {deprecated}, removed in version {removed}{% endtrans %} +{% trans %}Deprecated since version %s, will be removed in version %s{% endtrans %} +{% trans %}Deprecated since version %s, removed in version %s{% endtrans %} In docsbuild-scripts, when rewriting indexsidebar.html with actual versions: diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 1332c53f396..7e02e74177c 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -12,9 +12,8 @@ and adds some new things as well. More on Lists ============= -The list data type has some more methods. Here are all of the methods of list -objects: - +The :ref:`list ` data type has some more methods. Here are all +of the methods of list objects: .. method:: list.append(x) :noindex: @@ -445,10 +444,11 @@ packing and sequence unpacking. Sets ==== -Python also includes a data type for *sets*. A set is an unordered collection -with no duplicate elements. Basic uses include membership testing and -eliminating duplicate entries. Set objects also support mathematical operations -like union, intersection, difference, and symmetric difference. +Python also includes a data type for :ref:`sets `. A set is +an unordered collection with no duplicate elements. Basic uses include +membership testing and eliminating duplicate entries. Set objects also +support mathematical operations like union, intersection, difference, and +symmetric difference. Curly braces or the :func:`set` function can be used to create sets. Note: to create an empty set you have to use ``set()``, not ``{}``; the latter creates an diff --git a/Doc/tutorial/index.rst b/Doc/tutorial/index.rst index d0bf77dc40d..20fe161be4a 100644 --- a/Doc/tutorial/index.rst +++ b/Doc/tutorial/index.rst @@ -15,7 +15,7 @@ together with its interpreted nature, make it an ideal language for scripting and rapid application development in many areas on most platforms. The Python interpreter and the extensive standard library are freely available -in source or binary form for all major platforms from the Python web site, +in source or binary form for all major platforms from the Python website, https://www.python.org/, and may be freely distributed. The same site also contains distributions of and pointers to many free third party Python modules, programs and tools, and additional documentation. diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst index fb491149793..deabac52530 100644 --- a/Doc/tutorial/introduction.rst +++ b/Doc/tutorial/introduction.rst @@ -49,7 +49,7 @@ primary prompt, ``>>>``. (It shouldn't take long.) Numbers ------- -The interpreter acts as a simple calculator: you can type an expression at it +The interpreter acts as a simple calculator: you can type an expression into it and it will write the value. Expression syntax is straightforward: the operators ``+``, ``-``, ``*`` and ``/`` can be used to perform arithmetic; parentheses (``()``) can be used for grouping. diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst index d83ecca270b..342c1a00193 100644 --- a/Doc/tutorial/stdlib.rst +++ b/Doc/tutorial/stdlib.rst @@ -183,13 +183,13 @@ protocols. Two of the simplest are :mod:`urllib.request` for retrieving data from URLs and :mod:`smtplib` for sending mail:: >>> from urllib.request import urlopen - >>> with urlopen('http://worldtimeapi.org/api/timezone/etc/UTC.txt') as response: + >>> with urlopen('https://docs.python.org/3/') as response: ... for line in response: ... line = line.decode() # Convert bytes to a str - ... if line.startswith('datetime'): + ... if 'updated' in line: ... print(line.rstrip()) # Remove trailing newline ... - datetime: 2022-01-01T01:36:47.689215+00:00 + Last updated on Nov 11, 2025 (20:11 UTC). >>> import smtplib >>> server = smtplib.SMTP('localhost') @@ -335,7 +335,7 @@ sophisticated and robust capabilities of its larger packages. For example: names, no direct knowledge or handling of XML is needed. * The :mod:`email` package is a library for managing email messages, including - MIME and other :rfc:`2822`-based message documents. Unlike :mod:`smtplib` and + MIME and other :rfc:`5322`-based message documents. Unlike :mod:`smtplib` and :mod:`poplib` which actually send and receive messages, the email package has a complete toolset for building or decoding complex message structures (including attachments) and for implementing internet encoding and header diff --git a/Doc/tutorial/whatnow.rst b/Doc/tutorial/whatnow.rst index dbe2d7fc099..359cf80a7b2 100644 --- a/Doc/tutorial/whatnow.rst +++ b/Doc/tutorial/whatnow.rst @@ -30,7 +30,7 @@ the set are: More Python resources: -* https://www.python.org: The major Python web site. It contains code, +* https://www.python.org: The major Python website. It contains code, documentation, and pointers to Python-related pages around the web. * https://docs.python.org: Fast access to Python's documentation. diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 1f773a3a547..dff0fe036ea 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -4,10 +4,13 @@ Configure Python .. highlight:: sh + +.. _build-requirements: + Build Requirements ================== -Features and minimum versions required to build CPython: +To build CPython, you will need: * A `C11 `_ compiler. `Optional C11 features @@ -22,85 +25,138 @@ Features and minimum versions required to build CPython: * Support for threads. -To build optional modules: - -* `libbz2 `_ for the :mod:`bz2` module. - -* `libb2 `_ (:ref:`BLAKE2 `) - for the :mod:`hashlib` module. - -* `libffi `_ 3.3.0 is the recommended - minimum version for the :mod:`ctypes` module. - -* ``liblzma`` for the :mod:`lzma` module. - -* `libmpdec `_ 2.5.0 - for the :mod:`decimal` module. - -* ``libncurses`` or ``libncursesw`` for the :mod:`curses` module. - -* ``libpanel`` or ``libpanelw`` for the :mod:`curses.panel` module. - -* `libreadline `_ or - `libedit `_ - for the :mod:`readline` module. - -* `libuuid `_ for the :mod:`uuid` module. - -* `OpenSSL `_ 1.1.1 is the minimum version and - OpenSSL 3.0.18 is the recommended minimum version for the - :mod:`ssl` and :mod:`hashlib` extension modules. - -* `SQLite `_ 3.15.2 for the :mod:`sqlite3` extension module. - -* `Tcl/Tk `_ 8.5.12 for the :mod:`tkinter` module. - -* `zlib `_ 1.2.2.1 is the minimum version for the - :mod:`zlib` module. - -* `zstd `_ 1.4.5 is the minimum version for - the :mod:`compression.zstd` module. - -For a full list of dependencies required to build all modules and how to install -them, see the -`devguide `_. - -* Autoconf 2.72 and aclocal 1.16.5 are required to regenerate the - :file:`configure` script. - -.. versionchanged:: 3.1 - Tcl/Tk version 8.3.1 is now required. - .. versionchanged:: 3.5 On Windows, Visual Studio 2015 or later is now required. - Tcl/Tk version 8.4 is now required. .. versionchanged:: 3.6 - Selected C99 features are now required, like ```` and ``static - inline`` functions. + Selected C99 features, like ```` and ``static inline`` functions, + are now required. .. versionchanged:: 3.7 - Thread support and OpenSSL 1.0.2 are now required. - -.. versionchanged:: 3.10 - OpenSSL 1.1.1 is now required. - Require SQLite 3.7.15. + Thread support is now required. .. versionchanged:: 3.11 C11 compiler, IEEE 754 and NaN support are now required. On Windows, Visual Studio 2017 or later is required. - Tcl/Tk version 8.5.12 is now required for the :mod:`tkinter` module. - -.. versionchanged:: 3.13 - Autoconf 2.71, aclocal 1.16.5 and SQLite 3.15.2 are now required. - -.. versionchanged:: 3.14 - Autoconf 2.72 is now required. See also :pep:`7` "Style Guide for C Code" and :pep:`11` "CPython platform support". +.. _optional-module-requirements: + +Requirements for optional modules +--------------------------------- + +Some :term:`optional modules ` of the standard library +require third-party libraries installed for development +(for example, header files must be available). + +Missing requirements are reported in the ``configure`` output. +Modules that are missing due to missing dependencies are listed near the end +of the ``make`` output, +sometimes using an internal name, for example, ``_ctypes`` for :mod:`ctypes` +module. + +If you distribute a CPython interpreter without optional modules, +it's best practice to advise users, who generally expect that +standard library modules are available. + +Dependencies to build optional modules are: + +.. list-table:: + :header-rows: 1 + :align: left + + * - Dependency + - Minimum version + - Python module + * - `libbz2 `_ + - + - :mod:`bz2` + * - `libffi `_ + - 3.3.0 recommended + - :mod:`ctypes` + * - `liblzma `_ + - + - :mod:`lzma` + * - `libmpdec `_ + - 2.5.0 + - :mod:`decimal` [1]_ + * - `libreadline `_ or + `libedit `_ [2]_ + - + - :mod:`readline` + * - `libuuid `_ + - + - ``_uuid`` [3]_ + * - `ncurses `_ [4]_ + - + - :mod:`curses` + * - `OpenSSL `_ + - | 3.0.18 recommended + | (1.1.1 minimum) + - :mod:`ssl`, :mod:`hashlib` [5]_ + * - `SQLite `_ + - 3.15.2 + - :mod:`sqlite3` + * - `Tcl/Tk `_ + - 8.5.12 + - :mod:`tkinter`, :ref:`IDLE `, :mod:`turtle` + * - `zlib `_ + - 1.2.2.1 + - :mod:`zlib`, :mod:`gzip`, :mod:`ensurepip` + * - `zstd `_ + - 1.4.5 + - :mod:`compression.zstd` + +.. [1] If *libmpdec* is not available, the :mod:`decimal` module will use + a pure-Python implementation. + See :option:`--with-system-libmpdec` for details. +.. [2] See :option:`--with-readline` for choosing the backend for the + :mod:`readline` module. +.. [3] The :mod:`uuid` module uses ``_uuid`` to generate "safe" UUIDs. + See the module documentation for details. +.. [4] The :mod:`curses` module requires the ``libncurses`` or ``libncursesw`` + library. + The :mod:`curses.panel` module additionally requires the ``libpanel`` or + ``libpanelw`` library. +.. [5] If OpenSSL is not available, the :mod:`hashlib` module will use + bundled implementations of several hash functions. + See :option:`--with-builtin-hashlib-hashes` for *forcing* usage of OpenSSL. + +Note that the table does not include all optional modules; in particular, +platform-specific modules like :mod:`winreg` are not listed here. + +.. seealso:: + + * The `devguide `_ + includes a full list of dependencies required to build all modules and + instructions on how to install them on common platforms. + * :option:`--with-system-expat` allows building with an external + `libexpat `_ library. + * :ref:`configure-options-for-dependencies` + +.. versionchanged:: 3.1 + Tcl/Tk version 8.3.1 is now required for :mod:`tkinter`. + +.. versionchanged:: 3.5 + Tcl/Tk version 8.4 is now required for :mod:`tkinter`. + +.. versionchanged:: 3.7 + OpenSSL 1.0.2 is now required for :mod:`hashlib` and :mod:`ssl`. + +.. versionchanged:: 3.10 + OpenSSL 1.1.1 is now required for :mod:`hashlib` and :mod:`ssl`. + SQLite 3.7.15 is now required for :mod:`sqlite3`. + +.. versionchanged:: 3.11 + Tcl/Tk version 8.5.12 is now required for :mod:`tkinter`. + +.. versionchanged:: 3.13 + SQLite 3.15.2 is now required for :mod:`sqlite3`. + + Generated files =============== @@ -127,8 +183,19 @@ The container is optional, the following command can be run locally:: autoreconf -ivf -Werror -The generated files can change depending on the exact ``autoconf-archive``, -``aclocal`` and ``pkg-config`` versions. +The generated files can change depending on the exact versions of the +tools used. +The container that CPython uses has +`Autoconf `_ 2.72, +``aclocal`` from `Automake `_ 1.16.5, +and `pkg-config `_ 1.8.1. + +.. versionchanged:: 3.13 + Autoconf 2.71 and aclocal 1.16.5 and are now used to regenerate + :file:`configure`. + +.. versionchanged:: 3.14 + Autoconf 2.72 is now used to regenerate :file:`configure`. .. _configure-options: @@ -255,6 +322,30 @@ General Options .. versionadded:: 3.11 +.. option:: --with-missing-stdlib-config=FILE + + Path to a `JSON `_ configuration file + containing custom error messages for missing :term:`standard library` modules. + + This option is intended for Python distributors who wish to provide + distribution-specific guidance when users encounter standard library + modules that are missing or packaged separately. + + The JSON file should map missing module names to custom error message strings. + For example, if your distribution packages :mod:`tkinter` and + :mod:`_tkinter` separately and excludes :mod:`!_gdbm` for legal reasons, + the configuration could contain: + + .. code-block:: json + + { + "_gdbm": "The '_gdbm' module is not available in this distribution", + "tkinter": "Install the python-tk package to use tkinter", + "_tkinter": "Install the python-tk package to use tkinter", + } + + .. versionadded:: next + .. option:: --enable-pystats Turn on internal Python performance statistics gathering. @@ -409,6 +500,8 @@ Linker options Name for machine-dependent library files. +.. _configure-options-for-dependencies: + Options for third-party dependencies ------------------------------------ @@ -431,12 +524,6 @@ Options for third-party dependencies C compiler and linker flags for ``gdbm``. -.. option:: LIBB2_CFLAGS -.. option:: LIBB2_LIBS - - C compiler and linker flags for ``libb2`` (:ref:`BLAKE2 `), - used by :mod:`hashlib` module, overriding ``pkg-config``. - .. option:: LIBEDIT_CFLAGS .. option:: LIBEDIT_LIBS @@ -902,6 +989,13 @@ Libraries options .. versionchanged:: 3.13 Default to using the installed ``mpdecimal`` library. + .. versionchanged:: 3.15 + + A bundled copy of the library will no longer be selected + implicitly if an installed ``mpdecimal`` library is not found. + In Python 3.15 only, it can still be selected explicitly using + ``--with-system-libmpdec=no`` or ``--without-system-libmpdec``. + .. deprecated-removed:: 3.13 3.16 A copy of the ``mpdecimal`` library sources will no longer be distributed with Python 3.16. diff --git a/Doc/using/unix.rst b/Doc/using/unix.rst index 9ec4e341932..a9950ef7525 100644 --- a/Doc/using/unix.rst +++ b/Doc/using/unix.rst @@ -84,11 +84,17 @@ On FreeBSD and OpenBSD Building Python =============== +.. seealso:: + + If you want to contribute to CPython, refer to the + `devguide `_, + which includes build instructions and other tips on setting up environment. + If you want to compile CPython yourself, first thing you should do is get the `source `_. You can download either the -latest release's source or just grab a fresh `clone -`_. (If you want -to contribute patches, you will need a clone.) +latest release's source or grab a fresh `clone +`_. +You will also need to install the :ref:`build requirements `. The build process consists of the usual commands:: diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 0b98cfb8d27..ee182519199 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -4,6 +4,8 @@ .. _Microsoft Store app: https://apps.microsoft.com/detail/9NQ7512CXL7T +.. _legacy launcher: https://www.python.org/ftp/python/3.14.0/win32/launcher.msi + .. _using-on-windows: ************************* @@ -457,6 +459,25 @@ customization. - Specify the default format used by the ``py list`` command. By default, ``table``. + * - ``install_dir`` + - (none) + - Specify the root directory that runtimes will be installed into. + If you change this setting, previously installed runtimes will not be + usable unless you move them to the new location. + + * - ``global_dir`` + - (none) + - Specify the directory where global commands (such as ``python3.14.exe``) + are stored. + This directory should be added to your :envvar:`PATH` to make the + commands available from your terminal. + + * - ``download_dir`` + - (none) + - Specify the directory where downloaded files are stored. + This directory is a temporary cache, and can be cleaned up from time to + time. + Dotted names should be nested inside JSON objects, for example, ``list.format`` would be specified as ``{"list": {"format": "table"}}``. @@ -524,12 +545,9 @@ configuration option. The behaviour of shebangs in the Python install manager is subtly different from the previous ``py.exe`` launcher, and the old configuration options no longer apply. If you are specifically reliant on the old behaviour or - configuration, we recommend keeping the legacy launcher. It may be - `downloaded independently `_ - and installed on its own. The legacy launcher's ``py`` command will override - PyManager's one, and you will need to use ``pymanager`` commands for - installing and uninstalling. - + configuration, we recommend installing the `legacy launcher`_. The legacy + launcher's ``py`` command will override PyManager's one by default, and you + will need to use ``pymanager`` commands for installing and uninstalling. .. _Add-AppxPackage: https://learn.microsoft.com/powershell/module/appx/add-appxpackage @@ -840,6 +858,17 @@ default). These scripts are separated for each runtime, and so you may need to add multiple paths. + * - Typing ``script-name.py`` in the terminal opens in a new window. + - This is a known limitation of the operating system. Either specify ``py`` + before the script name, create a batch file containing ``@py "%~dpn0.py" %*`` + with the same name as the script, or install the `legacy launcher`_ + and select it as the association for scripts. + + * - Drag-dropping files onto a script doesn't work + - This is a known limitation of the operating system. It is supported with + the `legacy launcher`_, or with the Python install manager when installed + from the MSI. + .. _windows-embeddable: diff --git a/Doc/whatsnew/2.3.rst b/Doc/whatsnew/2.3.rst index b7e4e73f4ce..f43692b3dce 100644 --- a/Doc/whatsnew/2.3.rst +++ b/Doc/whatsnew/2.3.rst @@ -66,7 +66,7 @@ Here's a simple example:: The union and intersection of sets can be computed with the :meth:`~frozenset.union` and :meth:`~frozenset.intersection` methods; an alternative notation uses the bitwise operators ``&`` and ``|``. Mutable sets also have in-place versions of these methods, -:meth:`!union_update` and :meth:`~frozenset.intersection_update`. :: +:meth:`!union_update` and :meth:`~set.intersection_update`. :: >>> S1 = sets.Set([1,2,3]) >>> S2 = sets.Set([4,5,6]) @@ -87,7 +87,7 @@ It's also possible to take the symmetric difference of two sets. This is the set of all elements in the union that aren't in the intersection. Another way of putting it is that the symmetric difference contains all elements that are in exactly one set. Again, there's an alternative notation (``^``), and an -in-place version with the ungainly name :meth:`~frozenset.symmetric_difference_update`. :: +in-place version with the ungainly name :meth:`~set.symmetric_difference_update`. :: >>> S1 = sets.Set([1,2,3,4]) >>> S2 = sets.Set([3,4,5,6]) diff --git a/Doc/whatsnew/2.5.rst b/Doc/whatsnew/2.5.rst index 3430ac8668e..e195d9d462d 100644 --- a/Doc/whatsnew/2.5.rst +++ b/Doc/whatsnew/2.5.rst @@ -2169,9 +2169,9 @@ Changes to Python's build process and to the C API include: * Two new macros can be used to indicate C functions that are local to the current file so that a faster calling convention can be used. - ``Py_LOCAL(type)`` declares the function as returning a value of the + :c:macro:`Py_LOCAL` declares the function as returning a value of the specified *type* and uses a fast-calling qualifier. - ``Py_LOCAL_INLINE(type)`` does the same thing and also requests the + :c:macro:`Py_LOCAL_INLINE` does the same thing and also requests the function be inlined. If macro :c:macro:`!PY_LOCAL_AGGRESSIVE` is defined before :file:`python.h` is included, a set of more aggressive optimizations are enabled for the module; you should benchmark the results to find out if these diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index 09feb185b82..7296296d144 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -2181,14 +2181,14 @@ Changes to Python's build process and to the C API include: discussed in :issue:`5753`, and fixed by Antoine Pitrou. * New macros: the Python header files now define the following macros: - :c:macro:`!Py_ISALNUM`, - :c:macro:`!Py_ISALPHA`, - :c:macro:`!Py_ISDIGIT`, - :c:macro:`!Py_ISLOWER`, - :c:macro:`!Py_ISSPACE`, - :c:macro:`!Py_ISUPPER`, - :c:macro:`!Py_ISXDIGIT`, - :c:macro:`!Py_TOLOWER`, and :c:macro:`!Py_TOUPPER`. + :c:macro:`Py_ISALNUM`, + :c:macro:`Py_ISALPHA`, + :c:macro:`Py_ISDIGIT`, + :c:macro:`Py_ISLOWER`, + :c:macro:`Py_ISSPACE`, + :c:macro:`Py_ISUPPER`, + :c:macro:`Py_ISXDIGIT`, + :c:macro:`Py_TOLOWER`, and :c:macro:`Py_TOUPPER`. All of these functions are analogous to the C standard macros for classifying characters, but ignore the current locale setting, because in @@ -2196,8 +2196,6 @@ Changes to Python's build process and to the C API include: locale-independent way. (Added by Eric Smith; :issue:`5793`.) - .. XXX these macros don't seem to be described in the c-api docs. - * Removed function: :c:func:`!PyEval_CallObject` is now only available as a macro. A function version was being kept around to preserve ABI linking compatibility, but that was in 1997; it can certainly be diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 1a2fbda0c4c..9459b73bcb5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -3045,7 +3045,7 @@ Deprecated C APIs ----------------- * The :c:macro:`!Py_HUGE_VAL` macro is now :term:`soft deprecated`. - Use :c:macro:`!Py_INFINITY` instead. + Use :c:macro:`!INFINITY` instead. (Contributed by Sergey B Kirpichev in :gh:`120026`.) * The :c:macro:`!Py_IS_NAN`, :c:macro:`!Py_IS_INFINITY`, diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index 5a10c036c35..d94a97dd3b8 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -400,6 +400,97 @@ Other language changes not only integers or floats, although this does not improve precision. (Contributed by Serhiy Storchaka in :gh:`67795`.) +.. _whatsnew315-bytearray-take-bytes: + +* Added :meth:`bytearray.take_bytes(n=None, /) ` to take + bytes out of a :class:`bytearray` without copying. This enables optimizing code + which must return :class:`bytes` after working with a mutable buffer of bytes + such as data buffering, network protocol parsing, encoding, decoding, + and compression. Common code patterns which can be optimized with + :func:`~bytearray.take_bytes` are listed below. + + .. list-table:: Suggested optimizing refactors + :header-rows: 1 + + * - Description + - Old + - New + + * - Return :class:`bytes` after working with :class:`bytearray` + - .. code:: python + + def read() -> bytes: + buffer = bytearray(1024) + ... + return bytes(buffer) + + - .. code:: python + + def read() -> bytes: + buffer = bytearray(1024) + ... + return buffer.take_bytes() + + * - Empty a buffer getting the bytes + - .. code:: python + + buffer = bytearray(1024) + ... + data = bytes(buffer) + buffer.clear() + + - .. code:: python + + buffer = bytearray(1024) + ... + data = buffer.take_bytes() + + * - Split a buffer at a specific separator + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = bytes(buffer[:n + 1]) + del buffer[:n + 1] + assert data == b'abc' + assert buffer == bytearray(b'def') + + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = buffer.take_bytes(n + 1) + + * - Split a buffer at a specific separator; discard after the separator + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + data = bytes(buffer[:n]) + buffer.clear() + assert data == b'abc' + assert len(buffer) == 0 + + - .. code:: python + + buffer = bytearray(b'abc\ndef') + n = buffer.find(b'\n') + buffer.resize(n) + data = buffer.take_bytes() + + (Contributed by Cody Maloney in :gh:`139871`.) + +* Many functions related to compiling or parsing Python code, such as + :func:`compile`, :func:`ast.parse`, :func:`symtable.symtable`, + and :func:`importlib.abc.InspectLoader.source_to_code`, now allow the module + name to be passed. It is needed to unambiguously :ref:`filter ` + syntax warnings by module name. + (Contributed by Serhiy Storchaka in :gh:`135801`.) + +* Allowed defining the *__dict__* and *__weakref__* :ref:`__slots__ ` + for any class. + (Contributed by Serhiy Storchaka in :gh:`41779`.) + New modules =========== @@ -418,6 +509,10 @@ Improved modules argparse -------- +* The :class:`~argparse.BooleanOptionalAction` action supports now single-dash + long options and alternate prefix characters. + (Contributed by Serhiy Storchaka in :gh:`138525`.) + * Changed the *suggest_on_error* parameter of :class:`argparse.ArgumentParser` to default to ``True``. This enables suggestions for mistyped arguments by default. (Contributed by Jakob Schluse in :gh:`140450`.) @@ -461,6 +556,24 @@ collections.abc previously emitted if it was merely imported or accessed from the :mod:`!collections.abc` module. + +concurrent.futures +------------------ + +* Improved error reporting when a child process in a + :class:`concurrent.futures.ProcessPoolExecutor` terminates abruptly. + The resulting traceback will now tell you the PID and exit code of the + terminated process. + (Contributed by Jonathan Berg in :gh:`139486`.) + + +dataclasses +----------- + +* Annotations for generated ``__init__`` methods no longer include internal + type names. + + dbm --- @@ -486,6 +599,14 @@ difflib (Contributed by Jiahao Li in :gh:`134580`.) +functools +--------- + +* :func:`~functools.singledispatchmethod` now supports non-:term:`descriptor` + callables. + (Contributed by Serhiy Storchaka in :gh:`140873`.) + + hashlib ------- @@ -514,6 +635,14 @@ http.cookies (Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.) +inspect +------- + +* Add parameters *inherit_class_doc* and *fallback_to_class_doc* + for :func:`~inspect.getdoc`. + (Contributed by Serhiy Storchaka in :gh:`132686`.) + + locale ------ @@ -536,9 +665,12 @@ math mimetypes --------- +* Add ``application/node`` MIME type for ``.cjs`` extension. (Contributed by John Franey in :gh:`140937`.) * Add ``application/toml``. (Contributed by Gil Forcada in :gh:`139959`.) * Rename ``application/x-texinfo`` to ``application/texinfo``. - (Contributed by Charlie Lin in :gh:`140165`) + (Contributed by Charlie Lin in :gh:`140165`.) +* Changed the MIME type for ``.ai`` files to ``application/pdf``. + (Contributed by Stan Ulbrych in :gh:`141239`.) mmap @@ -703,6 +835,19 @@ timeit :ref:`environment variables `. (Contributed by Yi Hong in :gh:`139374`.) +tkinter +------- + +* The :meth:`!tkinter.Text.search` method now supports two additional + arguments: *nolinestop* which allows the search to + continue across line boundaries; + and *strictlimits* which restricts the search to within the specified range. + (Contributed by Rihaan Meher in :gh:`130848`) + +* A new method :meth:`!tkinter.Text.search_all` has been introduced. + This method allows for searching for all matches of a pattern + using Tcl's ``-all`` and ``-overlap`` options. + (Contributed by Rihaan Meher in :gh:`130848`) types ------ @@ -732,6 +877,17 @@ unittest (Contributed by Garry Cairns in :gh:`134567`.) +venv +---- + +* On POSIX platforms, platlib directories will be created if needed when + creating virtual environments, instead of using ``lib64 -> lib`` symlink. + This means purelib and platlib of virtual environments no longer share the + same ``lib`` directory on platforms where :data:`sys.platlibdir` is not + equal to ``lib``. + (Contributed by Rui Xi in :gh:`133951`.) + + warnings -------- @@ -744,17 +900,6 @@ warnings (Contributed by Serhiy Storchaka in :gh:`135801`.) -venv ----- - -* On POSIX platforms, platlib directories will be created if needed when - creating virtual environments, instead of using ``lib64 -> lib`` symlink. - This means purelib and platlib of virtual environments no longer share the - same ``lib`` directory on platforms where :data:`sys.platlibdir` is not - equal to ``lib``. - (Contributed by Rui Xi in :gh:`133951`.) - - xml.parsers.expat ----------------- @@ -1040,9 +1185,23 @@ New features (Contributed by Victor Stinner in :gh:`129813`.) +* Add a new :c:func:`PyImport_CreateModuleFromInitfunc` C-API for creating + a module from a *spec* and *initfunc*. + (Contributed by Itamar Oren in :gh:`116146`.) + * Add :c:func:`PyTuple_FromArray` to create a :class:`tuple` from an array. (Contributed by Victor Stinner in :gh:`111489`.) +* Add :c:func:`PyUnstable_Object_Dump` to dump an object to ``stderr``. + It should only be used for debugging. + (Contributed by Victor Stinner in :gh:`141070`.) + +* Add :c:func:`PyUnstable_ThreadState_SetStackProtection` and + :c:func:`PyUnstable_ThreadState_ResetStackProtection` functions to set + the stack protection base address and stack protection size of a Python + thread state. + (Contributed by Victor Stinner in :gh:`139653`.) + Changed C APIs -------------- @@ -1161,6 +1320,14 @@ Deprecated C APIs since 3.15 and will be removed in 3.17. (Contributed by Nikita Sobolev in :gh:`136355`.) +* :c:macro:`!Py_INFINITY` macro is :term:`soft deprecated`, + use the C11 standard ```` :c:macro:`!INFINITY` instead. + (Contributed by Sergey B Kirpichev in :gh:`141004`.) + +* :c:macro:`!Py_MATH_El` and :c:macro:`!Py_MATH_PIl` are deprecated + since 3.15 and will be removed in 3.20. + (Contributed by Sergey B Kirpichev in :gh:`141004`.) + .. Add C API deprecations above alphabetically, not here at the end. @@ -1173,6 +1340,12 @@ Build changes set to ``no`` or with :option:`!--without-system-libmpdec`. (Contributed by Sergey B Kirpichev in :gh:`115119`.) +* The new configure option :option:`--with-missing-stdlib-config=FILE` allows + distributors to pass a `JSON `_ + configuration file containing custom error messages for :term:`standard library` + modules that are missing or packaged separately. + (Contributed by Stan Ulbrych and Petr Viktorin in :gh:`139707`.) + Porting to Python 3.15 ====================== @@ -1180,7 +1353,7 @@ Porting to Python 3.15 This section lists previously described changes and other bugfixes that may require changes to your code. -* :class:`sqlite3.Connection` APIs has been cleaned up. +* :class:`sqlite3.Connection` APIs have been cleaned up. * All parameters of :func:`sqlite3.connect` except *database* are now keyword-only. * The first three parameters of methods :meth:`~sqlite3.Connection.create_function` @@ -1199,3 +1372,16 @@ that may require changes to your code. * :meth:`~mmap.mmap.resize` has been removed on platforms that don't support the underlying syscall, instead of raising a :exc:`SystemError`. + +* A resource warning is now emitted for an unclosed + :func:`xml.etree.ElementTree.iterparse` iterator if it opened a file. + Use its :meth:`!close` method or the :func:`contextlib.closing` context + manager to close it. + (Contributed by Osama Abdelkader and Serhiy Storchaka in :gh:`140601`.) + +* If a short option and a single-dash long option are passed to + :meth:`argparse.ArgumentParser.add_argument`, *dest* is now inferred from + the single-dash long option. For example, in ``add_argument('-f', '-foo')``, + *dest* is now ``'foo'`` instead of ``'f'``. + Pass an explicit *dest* argument to preserve the old behavior. + (Contributed by Serhiy Storchaka in :gh:`138697`.) diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 59afd6520c4..9f4068116e3 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -2,7 +2,7 @@ What's New In Python 3.4 **************************** -:Author: R. David Murray (Editor) +:Author: \R. David Murray (Editor) .. Rules for maintenance: diff --git a/Include/cpython/bytearrayobject.h b/Include/cpython/bytearrayobject.h index 4dddef713ce..1edd0820742 100644 --- a/Include/cpython/bytearrayobject.h +++ b/Include/cpython/bytearrayobject.h @@ -5,25 +5,25 @@ /* Object layout */ typedef struct { PyObject_VAR_HEAD - Py_ssize_t ob_alloc; /* How many bytes allocated in ob_bytes */ + /* How many bytes allocated in ob_bytes + + In the current implementation this is equivalent to Py_SIZE(ob_bytes_object). + The value is always loaded and stored atomically for thread safety. + There are API compatibilty concerns with removing so keeping for now. */ + Py_ssize_t ob_alloc; char *ob_bytes; /* Physical backing buffer */ char *ob_start; /* Logical start inside ob_bytes */ Py_ssize_t ob_exports; /* How many buffer exports */ + PyObject *ob_bytes_object; /* PyBytes for zero-copy bytes conversion */ } PyByteArrayObject; -PyAPI_DATA(char) _PyByteArray_empty_string[]; - /* Macros and static inline functions, trading safety for speed */ #define _PyByteArray_CAST(op) \ (assert(PyByteArray_Check(op)), _Py_CAST(PyByteArrayObject*, op)) static inline char* PyByteArray_AS_STRING(PyObject *op) { - PyByteArrayObject *self = _PyByteArray_CAST(op); - if (Py_SIZE(self)) { - return self->ob_start; - } - return _PyByteArray_empty_string; + return _PyByteArray_CAST(op)->ob_start; } #define PyByteArray_AS_STRING(self) PyByteArray_AS_STRING(_PyObject_CAST(self)) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index df9ec7050fc..5f2f7b6d4f5 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -39,16 +39,6 @@ Py_DEPRECATED(3.14) PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObje PyAPI_FUNC(PyObject *) PyDict_SetDefault( PyObject *mp, PyObject *key, PyObject *defaultobj); -// Inserts `key` with a value `default_value`, if `key` is not already present -// in the dictionary. If `result` is not NULL, then the value associated -// with `key` is returned in `*result` (either the existing value, or the now -// inserted `default_value`). -// Returns: -// -1 on error -// 0 if `key` was not present and `default_value` was inserted -// 1 if `key` was present and `default_value` was not inserted -PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result); - /* Get the number of items of a dictionary. */ static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) { PyDictObject *mp; diff --git a/Include/cpython/import.h b/Include/cpython/import.h index 0ce0b1ee6cc..149a20af8b9 100644 --- a/Include/cpython/import.h +++ b/Include/cpython/import.h @@ -10,6 +10,13 @@ struct _inittab { PyAPI_DATA(struct _inittab *) PyImport_Inittab; PyAPI_FUNC(int) PyImport_ExtendInittab(struct _inittab *newtab); +// Custom importers may use this API to initialize statically linked +// extension modules directly from a spec and init function, +// without needing to go through inittab +PyAPI_FUNC(PyObject *) PyImport_CreateModuleFromInitfunc( + PyObject *spec, + PyObject *(*initfunc)(void)); + struct _frozen { const char *name; /* ASCII encoded string */ const unsigned char *code; diff --git a/Include/cpython/object.h b/Include/cpython/object.h index d64298232e7..8693390aeda 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -295,7 +295,10 @@ PyAPI_FUNC(PyObject *) PyType_GetDict(PyTypeObject *); PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); PyAPI_FUNC(void) _Py_BreakPoint(void); -PyAPI_FUNC(void) _PyObject_Dump(PyObject *); +PyAPI_FUNC(void) PyUnstable_Object_Dump(PyObject *); + +// Alias for backward compatibility +#define _PyObject_Dump PyUnstable_Object_Dump PyAPI_FUNC(PyObject*) _PyObject_GetAttrId(PyObject *, _Py_Identifier *); @@ -387,10 +390,11 @@ PyAPI_FUNC(PyObject *) _PyObject_FunctionStr(PyObject *); process with a message on stderr if the given condition fails to hold, but compile away to nothing if NDEBUG is defined. - However, before aborting, Python will also try to call _PyObject_Dump() on - the given object. This may be of use when investigating bugs in which a - particular object is corrupt (e.g. buggy a tp_visit method in an extension - module breaking the garbage collector), to help locate the broken objects. + However, before aborting, Python will also try to call + PyUnstable_Object_Dump() on the given object. This may be of use when + investigating bugs in which a particular object is corrupt (e.g. buggy a + tp_visit method in an extension module breaking the garbage collector), to + help locate the broken objects. The WITH_MSG variant allows you to supply an additional message that Python will attempt to print to stderr, after the object dump. */ @@ -432,8 +436,6 @@ PyAPI_FUNC(void) _Py_NO_RETURN _PyObject_AssertFailed( PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op); PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate); -PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count); - /* For backwards compatibility with the old trashcan mechanism */ #define Py_TRASHCAN_BEGIN(op, dealloc) #define Py_TRASHCAN_END diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 69a8b9e615e..7176f667a40 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -948,14 +948,6 @@ _Py_atomic_store_ushort_relaxed(unsigned short *obj, unsigned short value) memory_order_relaxed); } -static inline void -_Py_atomic_store_uint_release(unsigned int *obj, unsigned int value) -{ - _Py_USING_STD; - atomic_store_explicit((_Atomic(unsigned int)*)obj, value, - memory_order_relaxed); -} - static inline void _Py_atomic_store_long_relaxed(long *obj, long value) { @@ -1031,6 +1023,14 @@ _Py_atomic_store_int_release(int *obj, int value) memory_order_release); } +static inline void +_Py_atomic_store_uint_release(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned int)*)obj, value, + memory_order_release); +} + static inline void _Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value) { diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index a33ba10b8d3..dac223368db 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -7,7 +7,7 @@ /* Parameters used for the numeric hash implementation. See notes for _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on - reduction modulo the prime 2**_PyHASH_BITS - 1. */ + reduction modulo the prime 2**PyHASH_BITS - 1. */ #if SIZEOF_VOID_P >= 8 # define PyHASH_BITS 61 @@ -15,7 +15,7 @@ # define PyHASH_BITS 31 #endif -#define PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) +#define PyHASH_MODULUS (((size_t)1 << PyHASH_BITS) - 1) #define PyHASH_INF 314159 #define PyHASH_IMAG PyHASH_MULTIPLIER diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index ac8798ff612..1e1e46ea4c0 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -113,6 +113,9 @@ struct _ts { /* Currently holds the GIL. Must be its own field to avoid data races */ int holds_gil; + /* Currently requesting the GIL */ + int gil_requested; + int _whence; /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED). @@ -217,6 +220,15 @@ struct _ts { */ PyObject *threading_local_sentinel; _PyRemoteDebuggerSupport remote_debugger_support; + +#ifdef Py_STATS + // Pointer to PyStats structure, NULL if recording is off. For the + // free-threaded build, the structure is per-thread (stored as a pointer + // in _PyThreadStateImpl). For the default build, the structure is stored + // in the PyInterpreterState structure (threads do not have their own + // structure and all share the same per-interpreter structure). + PyStats *pystats; +#endif }; /* other API */ @@ -239,6 +251,21 @@ PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate); // function is set, otherwise disable them. PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate); +#ifdef Py_STATS +#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) +extern _Py_thread_local PyThreadState *_Py_tss_tstate; + +static inline PyStats* +_PyThreadState_GetStatsFast(void) +{ + if (_Py_tss_tstate == NULL) { + return NULL; // no attached thread state + } + return _Py_tss_tstate->pystats; +} +#endif +#endif // Py_STATS + /* PyGILState */ /* Helper/diagnostic function - return 1 if the current thread @@ -252,6 +279,18 @@ PyAPI_FUNC(int) PyGILState_Check(void); */ PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void); +// Set the stack protection start address and stack protection size +// of a Python thread state +PyAPI_FUNC(int) PyUnstable_ThreadState_SetStackProtection( + PyThreadState *tstate, + void *stack_start_addr, // Stack start address + size_t stack_size); // Stack size (in bytes) + +// Reset the stack protection start address and stack protection size +// of a Python thread state +PyAPI_FUNC(void) PyUnstable_ThreadState_ResetStackProtection( + PyThreadState *tstate); + /* Routines for advanced debuggers, requested by David Beazley. Don't use unless you know what you are doing! */ PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void); diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index cf830b6066f..1c94603c08b 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -4,7 +4,7 @@ // // - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF() // and Py_DECREF(). -// - _Py_stats variable +// - _PyStats_GET() // // Functions of the sys module: // @@ -14,7 +14,7 @@ // - sys._stats_dump() // // Python must be built with ./configure --enable-pystats to define the -// Py_STATS macro. +// _PyStats_GET() macro. // // Define _PY_INTERPRETER macro to increment interpreter_increfs and // interpreter_decrefs. Otherwise, increment increfs and decrefs. @@ -109,6 +109,18 @@ typedef struct _gc_stats { uint64_t objects_not_transitively_reachable; } GCStats; +#ifdef Py_GIL_DISABLED +// stats specific to free-threaded build +typedef struct _ft_stats { + // number of times interpreter had to spin or park when trying to acquire a mutex + uint64_t mutex_sleeps; + // number of times that the QSBR mechanism polled (compute read sequence value) + uint64_t qsbr_polls; + // number of times stop-the-world mechanism was used + uint64_t world_stops; +} FTStats; +#endif + typedef struct _uop_stats { uint64_t execution_count; uint64_t miss; @@ -138,6 +150,8 @@ typedef struct _optimization_stats { uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; uint64_t optimizer_attempts; uint64_t optimizer_successes; + uint64_t optimizer_contradiction; + uint64_t optimizer_frame_overflow; uint64_t optimizer_failure_reason_no_memory; uint64_t remove_globals_builtins_changed; uint64_t remove_globals_incorrect_keys; @@ -173,22 +187,48 @@ typedef struct _stats { CallStats call_stats; ObjectStats object_stats; OptimizationStats optimization_stats; +#ifdef Py_GIL_DISABLED + FTStats ft_stats; +#endif RareEventStats rare_event_stats; - GCStats *gc_stats; + GCStats gc_stats[3]; // must match NUM_GENERATIONS } PyStats; +// Export for most shared extensions +PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void); -// Export for shared extensions like 'math' -PyAPI_DATA(PyStats*) _Py_stats; +#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) +// use inline function version defined in cpython/pystate.h +static inline PyStats *_PyThreadState_GetStatsFast(void); +#define _PyStats_GET _PyThreadState_GetStatsFast +#else +#define _PyStats_GET _PyStats_GetLocal +#endif + +#define _Py_STATS_EXPR(expr) \ + do { \ + PyStats *s = _PyStats_GET(); \ + if (s != NULL) { \ + s->expr; \ + } \ + } while (0) + +#define _Py_STATS_COND_EXPR(cond, expr) \ + do { \ + PyStats *s = _PyStats_GET(); \ + if (s != NULL && (cond)) { \ + s->expr; \ + } \ + } while (0) #ifdef _PY_INTERPRETER -# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0) -# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0) -# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_increfs++; } while (0) -# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_decrefs++; } while (0) +# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++) +# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++) +# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++) +# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++) #else -# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0) -# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0) -# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_increfs++; } while (0) -# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_decrefs++; } while (0) +# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++) +# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++) +# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++) +# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++) #endif diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 73e3bc44d6c..2853d24c34b 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -301,7 +301,6 @@ static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) { /* Returns the cached hash, or -1 if not cached yet. */ static inline Py_hash_t PyUnstable_Unicode_GET_CACHED_HASH(PyObject *op) { - assert(PyUnicode_Check(op)); #ifdef Py_GIL_DISABLED return _Py_atomic_load_ssize_relaxed(&_PyASCIIObject_CAST(op)->hash); #else diff --git a/Include/datetime.h b/Include/datetime.h index b78cc0e8e2e..ed36e6e48c8 100644 --- a/Include/datetime.h +++ b/Include/datetime.h @@ -1,8 +1,8 @@ /* datetime.h */ #ifndef Py_LIMITED_API -#ifndef DATETIME_H -#define DATETIME_H +#ifndef Py_DATETIME_H +#define Py_DATETIME_H #ifdef __cplusplus extern "C" { #endif @@ -263,5 +263,5 @@ static PyDateTime_CAPI *PyDateTimeAPI = NULL; #ifdef __cplusplus } #endif -#endif +#endif /* !Py_DATETIME_H */ #endif /* !Py_LIMITED_API */ diff --git a/Include/dictobject.h b/Include/dictobject.h index 1bbeec1ab69..0384e3131dc 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -68,6 +68,18 @@ PyAPI_FUNC(int) PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result PyAPI_FUNC(int) PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030F0000 +// Inserts `key` with a value `default_value`, if `key` is not already present +// in the dictionary. If `result` is not NULL, then the value associated +// with `key` is returned in `*result` (either the existing value, or the now +// inserted `default_value`). +// Returns: +// -1 on error +// 0 if `key` was not present and `default_value` was inserted +// 1 if `key` was present and `default_value` was not inserted +PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result); +#endif + #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *); #endif diff --git a/Include/exports.h b/Include/exports.h index 0c646d5beb6..62feb09ed2b 100644 --- a/Include/exports.h +++ b/Include/exports.h @@ -9,6 +9,7 @@ inside the Python core, they are private to the core. If in an extension module, it may be declared with external linkage depending on the platform. + PyMODEXPORT_FUNC: Like PyMODINIT_FUNC, but for a slots array As a number of platforms support/require "__declspec(dllimport/dllexport)", we support a HAVE_DECLSPEC_DLL macro to save duplication. @@ -62,9 +63,9 @@ /* module init functions inside the core need no external linkage */ /* except for Cygwin to handle embedding */ # if defined(__CYGWIN__) -# define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* +# define _PyINIT_FUNC_DECLSPEC Py_EXPORTED_SYMBOL # else /* __CYGWIN__ */ -# define PyMODINIT_FUNC PyObject* +# define _PyINIT_FUNC_DECLSPEC # endif /* __CYGWIN__ */ # else /* Py_BUILD_CORE */ /* Building an extension module, or an embedded situation */ @@ -78,9 +79,9 @@ # define PyAPI_DATA(RTYPE) extern Py_IMPORTED_SYMBOL RTYPE /* module init functions outside the core must be exported */ # if defined(__cplusplus) -# define PyMODINIT_FUNC extern "C" Py_EXPORTED_SYMBOL PyObject* +# define _PyINIT_FUNC_DECLSPEC extern "C" Py_EXPORTED_SYMBOL # else /* __cplusplus */ -# define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* +# define _PyINIT_FUNC_DECLSPEC Py_EXPORTED_SYMBOL # endif /* __cplusplus */ # endif /* Py_BUILD_CORE */ # endif /* HAVE_DECLSPEC_DLL */ @@ -93,13 +94,15 @@ #ifndef PyAPI_DATA # define PyAPI_DATA(RTYPE) extern Py_EXPORTED_SYMBOL RTYPE #endif -#ifndef PyMODINIT_FUNC +#ifndef _PyINIT_FUNC_DECLSPEC # if defined(__cplusplus) -# define PyMODINIT_FUNC extern "C" Py_EXPORTED_SYMBOL PyObject* +# define _PyINIT_FUNC_DECLSPEC extern "C" Py_EXPORTED_SYMBOL # else /* __cplusplus */ -# define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* +# define _PyINIT_FUNC_DECLSPEC Py_EXPORTED_SYMBOL # endif /* __cplusplus */ #endif +#define PyMODINIT_FUNC _PyINIT_FUNC_DECLSPEC PyObject* +#define PyMODEXPORT_FUNC _PyINIT_FUNC_DECLSPEC PyModuleDef_Slot* #endif /* Py_EXPORTS_H */ diff --git a/Include/floatobject.h b/Include/floatobject.h index 4d24a76edd5..814337b070a 100644 --- a/Include/floatobject.h +++ b/Include/floatobject.h @@ -18,14 +18,14 @@ PyAPI_DATA(PyTypeObject) PyFloat_Type; #define Py_RETURN_NAN return PyFloat_FromDouble(Py_NAN) -#define Py_RETURN_INF(sign) \ - do { \ - if (copysign(1., sign) == 1.) { \ - return PyFloat_FromDouble(Py_INFINITY); \ - } \ - else { \ - return PyFloat_FromDouble(-Py_INFINITY); \ - } \ +#define Py_RETURN_INF(sign) \ + do { \ + if (copysign(1., sign) == 1.) { \ + return PyFloat_FromDouble(INFINITY); \ + } \ + else { \ + return PyFloat_FromDouble(-INFINITY); \ + } \ } while(0) PyAPI_FUNC(double) PyFloat_GetMax(void); diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 454c8dde031..7f60eb49508 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -22,33 +22,48 @@ extern "C" { Another use is for the Tier 2 optimizer to decide when to create a new Tier 2 trace (executor). Again, exponential backoff is used. - The 16-bit counter is structured as a 12-bit unsigned 'value' - and a 4-bit 'backoff' field. When resetting the counter, the + The 16-bit counter is structured as a 13-bit unsigned 'value' + and a 3-bit 'backoff' field. When resetting the counter, the backoff field is incremented (until it reaches a limit) and the - value is set to a bit mask representing the value 2**backoff - 1. - The maximum backoff is 12 (the number of bits in the value). + value is set to a bit mask representing some prime value - 1. + New values and backoffs for each backoff are calculated once + at compile time and saved to value_and_backoff_next table. + The maximum backoff is 6, since 7 is an UNREACHABLE_BACKOFF. There is an exceptional value which must not be updated, 0xFFFF. */ -#define BACKOFF_BITS 4 -#define MAX_BACKOFF 12 -#define UNREACHABLE_BACKOFF 15 +#define BACKOFF_BITS 3 +#define BACKOFF_MASK 7 +#define MAX_BACKOFF 6 +#define UNREACHABLE_BACKOFF 7 +#define MAX_VALUE 0x1FFF -static inline bool -is_unreachable_backoff_counter(_Py_BackoffCounter counter) -{ - return counter.value_and_backoff == UNREACHABLE_BACKOFF; -} +#define MAKE_VALUE_AND_BACKOFF(value, backoff) \ + ((value << BACKOFF_BITS) | backoff) + +// For previous backoff b we use value x such that +// x + 1 is near to 2**(2*b+1) and x + 1 is prime. +static const uint16_t value_and_backoff_next[] = { + MAKE_VALUE_AND_BACKOFF(1, 1), + MAKE_VALUE_AND_BACKOFF(6, 2), + MAKE_VALUE_AND_BACKOFF(30, 3), + MAKE_VALUE_AND_BACKOFF(126, 4), + MAKE_VALUE_AND_BACKOFF(508, 5), + MAKE_VALUE_AND_BACKOFF(2052, 6), + // We use the same backoff counter for all backoffs >= MAX_BACKOFF. + MAKE_VALUE_AND_BACKOFF(8190, 6), + MAKE_VALUE_AND_BACKOFF(8190, 6), +}; static inline _Py_BackoffCounter make_backoff_counter(uint16_t value, uint16_t backoff) { - assert(backoff <= 15); - assert(value <= 0xFFF); - _Py_BackoffCounter result; - result.value_and_backoff = (value << BACKOFF_BITS) | backoff; - return result; + assert(backoff <= UNREACHABLE_BACKOFF); + assert(value <= MAX_VALUE); + return ((_Py_BackoffCounter){ + .value_and_backoff = MAKE_VALUE_AND_BACKOFF(value, backoff) + }); } static inline _Py_BackoffCounter @@ -62,14 +77,11 @@ forge_backoff_counter(uint16_t counter) static inline _Py_BackoffCounter restart_backoff_counter(_Py_BackoffCounter counter) { - assert(!is_unreachable_backoff_counter(counter)); - int backoff = counter.value_and_backoff & 15; - if (backoff < MAX_BACKOFF) { - return make_backoff_counter((1 << (backoff + 1)) - 1, backoff + 1); - } - else { - return make_backoff_counter((1 << MAX_BACKOFF) - 1, MAX_BACKOFF); - } + uint16_t backoff = counter.value_and_backoff & BACKOFF_MASK; + assert(backoff <= MAX_BACKOFF); + return ((_Py_BackoffCounter){ + .value_and_backoff = value_and_backoff_next[backoff] + }); } static inline _Py_BackoffCounter @@ -95,12 +107,25 @@ backoff_counter_triggers(_Py_BackoffCounter counter) return counter.value_and_backoff < UNREACHABLE_BACKOFF; } +static inline _Py_BackoffCounter +trigger_backoff_counter(void) +{ + _Py_BackoffCounter result; + result.value_and_backoff = 0; + return result; +} + // Initial JUMP_BACKWARD counter. // Must be larger than ADAPTIVE_COOLDOWN_VALUE, otherwise when JIT code is // invalidated we may construct a new trace before the bytecode has properly // re-specialized: -#define JUMP_BACKWARD_INITIAL_VALUE 4095 -#define JUMP_BACKWARD_INITIAL_BACKOFF 12 +// Note: this should be a prime number-1. This increases the likelihood of +// finding a "good" loop iteration to trace. +// For example, 4095 does not work for the nqueens benchmark on pyperformance +// as we always end up tracing the loop iteration's +// exhaustion iteration. Which aborts our current tracer. +#define JUMP_BACKWARD_INITIAL_VALUE 4000 +#define JUMP_BACKWARD_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_jump_backoff_counter(void) { @@ -112,8 +137,8 @@ initial_jump_backoff_counter(void) * Must be larger than ADAPTIVE_COOLDOWN_VALUE, * otherwise when a side exit warms up we may construct * a new trace before the Tier 1 code has properly re-specialized. */ -#define SIDE_EXIT_INITIAL_VALUE 4095 -#define SIDE_EXIT_INITIAL_BACKOFF 12 +#define SIDE_EXIT_INITIAL_VALUE 4000 +#define SIDE_EXIT_INITIAL_BACKOFF 6 static inline _Py_BackoffCounter initial_temperature_backoff_counter(void) diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index c7bc53b6073..8e8fa696ee0 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -60,6 +60,14 @@ PyAPI_FUNC(void) _PyBytes_Repeat(char* dest, Py_ssize_t len_dest, const char* src, Py_ssize_t len_src); +/* _PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation + for a bytes object of length n should request PyBytesObject_SIZE + n bytes. + + Using _PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves + 3 or 7 bytes per bytes object allocation on a typical system. +*/ +#define _PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1) + /* --- PyBytesWriter ------------------------------------------------------ */ struct PyBytesWriter { diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h index 506da06f708..4f4cf02f64b 100644 --- a/Include/internal/pycore_call.h +++ b/Include/internal/pycore_call.h @@ -64,39 +64,6 @@ PyAPI_FUNC(PyObject*) _PyObject_CallMethod( PyObject *name, const char *format, ...); -extern PyObject* _PyObject_CallMethodIdObjArgs( - PyObject *obj, - _Py_Identifier *name, - ...); - -static inline PyObject * -_PyObject_VectorcallMethodId( - _Py_Identifier *name, PyObject *const *args, - size_t nargsf, PyObject *kwnames) -{ - PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ - if (!oname) { - return _Py_NULL; - } - return PyObject_VectorcallMethod(oname, args, nargsf, kwnames); -} - -static inline PyObject * -_PyObject_CallMethodIdNoArgs(PyObject *self, _Py_Identifier *name) -{ - size_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET; - return _PyObject_VectorcallMethodId(name, &self, nargsf, _Py_NULL); -} - -static inline PyObject * -_PyObject_CallMethodIdOneArg(PyObject *self, _Py_Identifier *name, PyObject *arg) -{ - PyObject *args[2] = {self, arg}; - size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET; - assert(arg != NULL); - return _PyObject_VectorcallMethodId(name, args, nargsf, _Py_NULL); -} - /* === Vectorcall protocol (PEP 590) ============================= */ diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index f1e84592b2e..56018d9cbad 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -33,8 +33,6 @@ extern int _PyEval_SetOpcodeTrace(PyFrameObject *f, bool enable); // Export for 'array' shared extension PyAPI_FUNC(PyObject*) _PyEval_GetBuiltin(PyObject *); -extern PyObject* _PyEval_GetBuiltinId(_Py_Identifier *); - extern void _PyEval_SetSwitchInterval(unsigned long microseconds); extern unsigned long _PyEval_GetSwitchInterval(void); @@ -217,7 +215,14 @@ extern void _PyEval_DeactivateOpCache(void); static inline int _Py_MakeRecCheck(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - return here_addr < _tstate->c_stack_soft_limit; + // Overflow if stack pointer is between soft limit and the base of the hardware stack. + // If it is below the hardware stack base, assume that we have the wrong stack limits, and do nothing. + // We could have the wrong stack limits because of limited platform support, or user-space threads. +#if _Py_STACK_GROWS_DOWN + return here_addr < _tstate->c_stack_soft_limit && here_addr >= _tstate->c_stack_soft_limit - 2 * _PyOS_STACK_MARGIN_BYTES; +#else + return here_addr > _tstate->c_stack_soft_limit && here_addr <= _tstate->c_stack_soft_limit + 2 * _PyOS_STACK_MARGIN_BYTES; +#endif } // Export for '_json' shared extension, used via _Py_EnterRecursiveCall() @@ -249,9 +254,18 @@ static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->c_stack_hard_limit != 0); +#if _Py_STACK_GROWS_DOWN return here_addr <= _tstate->c_stack_soft_limit; +#else + return here_addr >= _tstate->c_stack_soft_limit; +#endif } +// Export for test_peg_generator +PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin( + PyThreadState *tstate, + int margin_count); + static inline void _Py_LeaveRecursiveCall(void) { } @@ -399,6 +413,66 @@ _PyForIter_VirtualIteratorNext(PyThreadState* tstate, struct _PyInterpreterFrame #define SPECIAL___AEXIT__ 3 #define SPECIAL_MAX 3 +PyAPI_DATA(const _Py_CODEUNIT *) _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS_PTR; + +/* Helper functions for large uops */ + +PyAPI_FUNC(PyObject *) +_Py_VectorCall_StackRefSteal( + _PyStackRef callable, + _PyStackRef *arguments, + int total_args, + _PyStackRef kwnames); + +PyAPI_FUNC(PyObject *) +_Py_BuiltinCallFast_StackRefSteal( + _PyStackRef callable, + _PyStackRef *arguments, + int total_args); + +PyAPI_FUNC(PyObject *) +_Py_BuiltinCallFastWithKeywords_StackRefSteal( + _PyStackRef callable, + _PyStackRef *arguments, + int total_args); + +PyAPI_FUNC(PyObject *) +_PyCallMethodDescriptorFast_StackRefSteal( + _PyStackRef callable, + PyMethodDef *meth, + PyObject *self, + _PyStackRef *arguments, + int total_args); + +PyAPI_FUNC(PyObject *) +_PyCallMethodDescriptorFastWithKeywords_StackRefSteal( + _PyStackRef callable, + PyMethodDef *meth, + PyObject *self, + _PyStackRef *arguments, + int total_args); + +PyAPI_FUNC(PyObject *) +_Py_CallBuiltinClass_StackRefSteal( + _PyStackRef callable, + _PyStackRef *arguments, + int total_args); + +PyAPI_FUNC(PyObject *) +_Py_BuildString_StackRefSteal( + _PyStackRef *arguments, + int total_args); + +PyAPI_FUNC(PyObject *) +_Py_BuildMap_StackRefSteal( + _PyStackRef *arguments, + int half_args); + +PyAPI_FUNC(void) +_Py_assert_within_stack_bounds( + _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, + const char *filename, int lineno); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2d7d81d491c..cb9c0aa27a1 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -274,8 +274,13 @@ extern void _PyLineTable_InitAddressRange( /** API for traversing the line number table. */ PyAPI_FUNC(int) _PyLineTable_NextAddressRange(PyCodeAddressRange *range); extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); -// This is used in dump_frame() in traceback.c without an attached tstate. -extern int _PyCode_Addr2LineNoTstate(PyCodeObject *co, int addr); + +// Similar to PyCode_Addr2Line(), but return -1 if the code object is invalid +// and can be called without an attached tstate. Used by dump_frame() in +// Python/traceback.c. The function uses heuristics to detect freed memory, +// it's not 100% reliable. +extern int _PyCode_SafeAddr2Line(PyCodeObject *co, int addr); + /** API for executors */ extern void _PyCode_Clear_Executors(PyCodeObject *code); @@ -306,8 +311,8 @@ PyAPI_FUNC(void) _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins _Py_CODEUNIT *instr, PyObject *name); PyAPI_FUNC(void) _Py_Specialize_StoreSubscr(_PyStackRef container, _PyStackRef sub, _Py_CODEUNIT *instr); -PyAPI_FUNC(void) _Py_Specialize_Call(_PyStackRef callable, _Py_CODEUNIT *instr, - int nargs); +PyAPI_FUNC(void) _Py_Specialize_Call(_PyStackRef callable, _PyStackRef self_or_null, + _Py_CODEUNIT *instr, int nargs); PyAPI_FUNC(void) _Py_Specialize_CallKw(_PyStackRef callable, _Py_CODEUNIT *instr, int nargs); PyAPI_FUNC(void) _Py_Specialize_BinaryOp(_PyStackRef lhs, _PyStackRef rhs, _Py_CODEUNIT *instr, diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index 3f9445f6ae1..911cc1f10f1 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -32,7 +32,8 @@ PyAPI_FUNC(PyCodeObject*) _PyAST_Compile( PyObject *filename, PyCompilerFlags *flags, int optimize, - struct _arena *arena); + struct _arena *arena, + PyObject *module); /* AST preprocessing */ extern int _PyCompile_AstPreprocess( @@ -41,7 +42,8 @@ extern int _PyCompile_AstPreprocess( PyCompilerFlags *flags, int optimize, struct _arena *arena, - int syntax_check_only); + int syntax_check_only, + PyObject *module); extern int _PyAST_Preprocess( struct _mod *, @@ -50,7 +52,8 @@ extern int _PyAST_Preprocess( int optimize, int ff_features, int syntax_check_only, - int enable_warnings); + int enable_warnings, + PyObject *module); typedef struct { diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 2601de40737..60b6fc4a72e 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -32,7 +32,7 @@ extern "C" { const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ PyCriticalSection _cs; \ if (_should_lock_cs) { \ - _PyCriticalSection_Begin(&_cs, _orig_seq); \ + PyCriticalSection_Begin(&_cs, _orig_seq); \ } # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ @@ -77,10 +77,10 @@ _PyCriticalSection_Resume(PyThreadState *tstate); // (private) slow path for locking the mutex PyAPI_FUNC(void) -_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m); +_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m); PyAPI_FUNC(void) -_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked); PyAPI_FUNC(void) @@ -95,34 +95,30 @@ _PyCriticalSection_IsActive(uintptr_t tag) } static inline void -_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginMutex(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m) { if (PyMutex_LockFast(m)) { - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_mutex = m; c->_cs_prev = tstate->critical_section; tstate->critical_section = (uintptr_t)c; } else { - _PyCriticalSection_BeginSlow(c, m); + _PyCriticalSection_BeginSlow(tstate, c, m); } } -#define PyCriticalSection_BeginMutex _PyCriticalSection_BeginMutex static inline void -_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +_PyCriticalSection_Begin(PyThreadState *tstate, PyCriticalSection *c, PyObject *op) { - _PyCriticalSection_BeginMutex(c, &op->ob_mutex); + _PyCriticalSection_BeginMutex(tstate, c, &op->ob_mutex); } -#define PyCriticalSection_Begin _PyCriticalSection_Begin // Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. static inline void -_PyCriticalSection_Pop(PyCriticalSection *c) +_PyCriticalSection_Pop(PyThreadState *tstate, PyCriticalSection *c) { - PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = c->_cs_prev; tstate->critical_section = prev; @@ -132,7 +128,7 @@ _PyCriticalSection_Pop(PyCriticalSection *c) } static inline void -_PyCriticalSection_End(PyCriticalSection *c) +_PyCriticalSection_End(PyThreadState *tstate, PyCriticalSection *c) { // If the mutex is NULL, we used the fast path in // _PyCriticalSection_BeginSlow for locks already held in the top-most @@ -141,18 +137,17 @@ _PyCriticalSection_End(PyCriticalSection *c) return; } PyMutex_Unlock(c->_cs_mutex); - _PyCriticalSection_Pop(c); + _PyCriticalSection_Pop(tstate, c); } -#define PyCriticalSection_End _PyCriticalSection_End static inline void -_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +_PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { if (m1 == m2) { // If the two mutex arguments are the same, treat this as a critical // section with a single mutex. c->_cs_mutex2 = NULL; - _PyCriticalSection_BeginMutex(&c->_cs_base, m1); + _PyCriticalSection_BeginMutex(tstate, &c->_cs_base, m1); return; } @@ -167,7 +162,6 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if (PyMutex_LockFast(m1)) { if (PyMutex_LockFast(m2)) { - PyThreadState *tstate = _PyThreadState_GET(); c->_cs_base._cs_mutex = m1; c->_cs_mutex2 = m2; c->_cs_base._cs_prev = tstate->critical_section; @@ -176,24 +170,22 @@ _PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) tstate->critical_section = p; } else { - _PyCriticalSection2_BeginSlow(c, m1, m2, 1); + _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 1); } } else { - _PyCriticalSection2_BeginSlow(c, m1, m2, 0); + _PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 0); } } -#define PyCriticalSection2_BeginMutex _PyCriticalSection2_BeginMutex static inline void -_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) +_PyCriticalSection2_Begin(PyThreadState *tstate, PyCriticalSection2 *c, PyObject *a, PyObject *b) { - _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex); + _PyCriticalSection2_BeginMutex(tstate, c, &a->ob_mutex, &b->ob_mutex); } -#define PyCriticalSection2_Begin _PyCriticalSection2_Begin static inline void -_PyCriticalSection2_End(PyCriticalSection2 *c) +_PyCriticalSection2_End(PyThreadState *tstate, PyCriticalSection2 *c) { // if mutex1 is NULL, we used the fast path in // _PyCriticalSection_BeginSlow for mutexes that are already held, @@ -207,9 +199,8 @@ _PyCriticalSection2_End(PyCriticalSection2 *c) PyMutex_Unlock(c->_cs_mutex2); } PyMutex_Unlock(c->_cs_base._cs_mutex); - _PyCriticalSection_Pop(&c->_cs_base); + _PyCriticalSection_Pop(tstate, &c->_cs_base); } -#define PyCriticalSection2_End _PyCriticalSection2_End static inline void _PyCriticalSection_AssertHeld(PyMutex *mutex) @@ -251,6 +242,45 @@ _PyCriticalSection_AssertHeldObj(PyObject *op) #endif } + +#undef Py_BEGIN_CRITICAL_SECTION +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + PyCriticalSection _py_cs; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection_Begin(_cs_tstate, &_py_cs, _PyObject_CAST(op)) + +#undef Py_BEGIN_CRITICAL_SECTION_MUTEX +# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex) \ + { \ + PyCriticalSection _py_cs; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection_BeginMutex(_cs_tstate, &_py_cs, mutex) + +#undef Py_END_CRITICAL_SECTION +# define Py_END_CRITICAL_SECTION() \ + _PyCriticalSection_End(_cs_tstate, &_py_cs); \ + } + +#undef Py_BEGIN_CRITICAL_SECTION2 +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection2_Begin(_cs_tstate, &_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) + +#undef Py_BEGIN_CRITICAL_SECTION2_MUTEX +# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyThreadState *_cs_tstate = _PyThreadState_GET(); \ + _PyCriticalSection2_BeginMutex(_cs_tstate, &_py_cs2, m1, m2) + +#undef Py_END_CRITICAL_SECTION2 +# define Py_END_CRITICAL_SECTION2() \ + _PyCriticalSection2_End(_cs_tstate, &_py_cs2); \ + } + #endif /* Py_GIL_DISABLED */ #ifdef __cplusplus diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 8e7cd16acff..0f17bf17f82 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets { uint64_t native_thread_id; uint64_t datastack_chunk; uint64_t status; + uint64_t holds_gil; + uint64_t gil_requested; } thread_state; // InterpreterFrame offset; @@ -210,6 +212,7 @@ typedef struct _Py_DebugOffsets { struct _gc { uint64_t size; uint64_t collecting; + uint64_t frame; } gc; // Generator object offset; @@ -273,6 +276,8 @@ typedef struct _Py_DebugOffsets { .native_thread_id = offsetof(PyThreadState, native_thread_id), \ .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ .status = offsetof(PyThreadState, _status), \ + .holds_gil = offsetof(PyThreadState, holds_gil), \ + .gil_requested = offsetof(PyThreadState, gil_requested), \ }, \ .interpreter_frame = { \ .size = sizeof(_PyInterpreterFrame), \ @@ -351,6 +356,7 @@ typedef struct _Py_DebugOffsets { .gc = { \ .size = sizeof(struct _gc_runtime_state), \ .collecting = offsetof(struct _gc_runtime_state, collecting), \ + .frame = offsetof(struct _gc_runtime_state, frame), \ }, \ .gen_object = { \ .size = sizeof(PyGenObject), \ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 9a14340c9f9..f7bceb4605e 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -44,6 +44,7 @@ extern int _PyDict_SetItemId(PyObject *dp, _Py_Identifier *key, PyObject *item); extern int _PyDict_DelItemId(PyObject *mp, _Py_Identifier *key); extern void _PyDict_ClearKeysVersion(PyObject *mp); + extern int _PyDict_Next( PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash); diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 2a7b649f5e7..1a1bcc77ece 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -13,7 +13,7 @@ static inline void _PyStaticObject_CheckRefcnt(PyObject *obj) { if (!_Py_IsImmortal(obj)) { fprintf(stderr, "Immortal Object has less refcnt than expected.\n"); - _PyObject_Dump(obj); + PyUnstable_Object_Dump(obj); } } #endif @@ -1326,10 +1326,12 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(gc)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(native)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); @@ -1766,6 +1768,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fullerror)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(gc)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug)); @@ -1909,6 +1912,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(native)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name)); @@ -2069,6 +2073,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(take_bytes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target_is_directory)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(task)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 0024a4cfd8a..0633ceeaf0c 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -46,10 +46,12 @@ struct _Py_global_strings { STRUCT_FOR_STR(dot_locals, ".") STRUCT_FOR_STR(empty, "") STRUCT_FOR_STR(format, ".format") + STRUCT_FOR_STR(gc, "") STRUCT_FOR_STR(generic_base, ".generic_base") STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(kwdefaults, ".kwdefaults") STRUCT_FOR_STR(list_err, "list index out of range") + STRUCT_FOR_STR(native, "") STRUCT_FOR_STR(str_replace_inf, "1e309") STRUCT_FOR_STR(type_params, ".type_params") STRUCT_FOR_STR(utf_8, "utf-8") @@ -489,6 +491,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(fullerror) STRUCT_FOR_ID(func) STRUCT_FOR_ID(future) + STRUCT_FOR_ID(gc) STRUCT_FOR_ID(generation) STRUCT_FOR_ID(get) STRUCT_FOR_ID(get_debug) @@ -632,6 +635,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(name_from) STRUCT_FOR_ID(namespace_separator) STRUCT_FOR_ID(namespaces) + STRUCT_FOR_ID(native) STRUCT_FOR_ID(ndigits) STRUCT_FOR_ID(nested) STRUCT_FOR_ID(new_file_name) @@ -792,6 +796,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(symmetric_difference_update) STRUCT_FOR_ID(tabsize) STRUCT_FOR_ID(tag) + STRUCT_FOR_ID(take_bytes) STRUCT_FOR_ID(target) STRUCT_FOR_ID(target_is_directory) STRUCT_FOR_ID(task) diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index 23eca2dd911..c0db135a4e4 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -144,11 +144,18 @@ PyAPI_FUNC(int) _PyImport_ClearExtension(PyObject *name, PyObject *filename); // state of the module argument: // - If module is NULL or a PyModuleObject with md_gil == Py_MOD_GIL_NOT_USED, // call _PyEval_DisableGIL(). -// - Otherwise, call _PyEval_EnableGILPermanent(). If the GIL was not already -// enabled permanently, issue a warning referencing the module's name. +// - Otherwise, call _PyImport_EnableGILAndWarn // // This function may raise an exception. extern int _PyImport_CheckGILForModule(PyObject *module, PyObject *module_name); +// Assuming that the GIL is enabled from a call to +// _PyEval_EnableGILTransient(), call _PyEval_EnableGILPermanent(). +// If the GIL was not already enabled permanently, issue a warning referencing +// the module's name. +// Leave a message in verbose mode. +// +// This function may raise an exception. +extern int _PyImport_EnableGILAndWarn(PyThreadState *, PyObject *module_name); #endif #ifdef __cplusplus diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h index 3ba9229cc21..f60c5510d20 100644 --- a/Include/internal/pycore_importdl.h +++ b/Include/internal/pycore_importdl.h @@ -14,6 +14,34 @@ extern "C" { extern const char *_PyImport_DynLoadFiletab[]; +#ifdef HAVE_DYNAMIC_LOADING +/* ./configure sets HAVE_DYNAMIC_LOADING if dynamic loading of modules is + supported on this platform. configure will then compile and link in one + of the dynload_*.c files, as appropriate. We will call a function in + those modules to get a function pointer to the module's init function. + + The function should return: + - The function pointer on success + - NULL with exception set if the library cannot be loaded + - NULL *without* an extension set if the library could be loaded but the + function cannot be found in it. +*/ +#ifdef MS_WINDOWS +#include +typedef FARPROC dl_funcptr; +extern dl_funcptr _PyImport_FindSharedFuncptrWindows(const char *prefix, + const char *shortname, + PyObject *pathname, + FILE *fp); +#else +typedef void (*dl_funcptr)(void); +extern dl_funcptr _PyImport_FindSharedFuncptr(const char *prefix, + const char *shortname, + const char *pathname, FILE *fp); +#endif + +#endif /* HAVE_DYNAMIC_LOADING */ + typedef enum ext_module_kind { _Py_ext_module_kind_UNKNOWN = 0, @@ -28,6 +56,11 @@ typedef enum ext_module_origin { _Py_ext_module_origin_DYNAMIC = 3, } _Py_ext_module_origin; +struct hook_prefixes { + const char *const init_prefix; + const char *const export_prefix; +}; + /* Input for loading an extension module. */ struct _Py_ext_module_loader_info { PyObject *filename; @@ -40,7 +73,7 @@ struct _Py_ext_module_loader_info { * depending on if it's builtin or not. */ PyObject *path; _Py_ext_module_origin origin; - const char *hook_prefix; + const struct hook_prefixes *hook_prefixes; const char *newcontext; }; extern void _Py_ext_module_loader_info_clear( @@ -62,7 +95,9 @@ extern int _Py_ext_module_loader_info_init_from_spec( PyObject *spec); #endif -/* The result from running an extension module's init function. */ +/* The result from running an extension module's init function. + * Not used for modules defined via PyModExport (slots array). + */ struct _Py_ext_module_loader_result { PyModuleDef *def; PyObject *module; @@ -89,10 +124,11 @@ extern void _Py_ext_module_loader_result_apply_error( /* The module init function. */ typedef PyObject *(*PyModInitFunction)(void); +typedef PyModuleDef_Slot *(*PyModExportFunction)(void); #ifdef HAVE_DYNAMIC_LOADING -extern PyModInitFunction _PyImport_GetModInitFunc( +extern int _PyImport_GetModuleExportHooks( struct _Py_ext_module_loader_info *info, - FILE *fp); + FILE *fp, PyModInitFunction *modinit, PyModExportFunction *modexport); #endif extern int _PyImport_RunModInitFunc( PyModInitFunction p0, @@ -104,8 +140,6 @@ extern int _PyImport_RunModInitFunc( #define MAXSUFFIXSIZE 12 #ifdef MS_WINDOWS -#include -typedef FARPROC dl_funcptr; #ifdef Py_DEBUG # define PYD_DEBUG_SUFFIX "_d" @@ -128,8 +162,6 @@ typedef FARPROC dl_funcptr; #define PYD_TAGGED_SUFFIX PYD_DEBUG_SUFFIX "." PYD_SOABI ".pyd" #define PYD_UNTAGGED_SUFFIX PYD_DEBUG_SUFFIX ".pyd" -#else -typedef void (*dl_funcptr)(void); #endif diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 368dafb9063..183b2d45c5e 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -153,10 +153,8 @@ typedef enum { } _PyConfigInitEnum; typedef enum { - /* For now, this means the GIL is enabled. - - gh-116329: This will eventually change to "the GIL is disabled but can - be re-enabled by loading an incompatible extension module." */ + /* In free threaded builds, this means that the GIL is disabled at startup, + but may be enabled by loading an incompatible extension module. */ _PyConfig_GIL_DEFAULT = -1, /* The GIL has been forced off or on, and will not be affected by module loading. */ diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index a147b9e5d73..9507d6e5974 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -14,8 +14,6 @@ extern "C" { #include "pycore_structs.h" // PyHamtObject #include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_typedefs.h" // _PyRuntimeState -#include "pycore_uop.h" // struct _PyUOpInstruction - #define CODE_MAX_WATCHERS 8 #define CONTEXT_MAX_WATCHERS 8 @@ -181,6 +179,10 @@ struct gc_collection_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects considered for collection and traversed: + Py_ssize_t candidates; + // Duration of the collection in seconds: + double duration; }; /* Running stats per generation */ @@ -191,6 +193,10 @@ struct gc_generation_stats { Py_ssize_t collected; /* total number of uncollectable objects (put into gc.garbage) */ Py_ssize_t uncollectable; + // Total number of objects considered for collection and traversed: + Py_ssize_t candidates; + // Duration of the collection in seconds: + double duration; }; enum _GCPhase { @@ -199,7 +205,7 @@ enum _GCPhase { }; /* If we change this, we need to change the default value in the - signature of gc.collect. */ + signature of gc.collect and change the size of PyStats.gc_stats */ #define NUM_GENERATIONS 3 struct _gc_runtime_state { @@ -214,6 +220,9 @@ struct _gc_runtime_state { struct gc_generation_stats generation_stats[NUM_GENERATIONS]; /* true if we are currently running the collector */ int collecting; + // The frame that started the current collection. It might be NULL even when + // collecting (if no Python frame is running): + _PyInterpreterFrame *frame; /* list of uncollectable objects */ PyObject *garbage; /* a list of callbacks to be invoked when collection is performed */ @@ -940,10 +949,10 @@ struct _is { PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; bool compiling; - struct _PyUOpInstruction *jit_uop_buffer; struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *cold_executor; + struct _PyExecutorObject *cold_dynamic_executor; int executor_deletion_list_remaining_capacity; size_t executor_creation_counter; _rare_events rare_events; @@ -969,6 +978,18 @@ struct _is { # ifdef Py_STACKREF_CLOSE_DEBUG _Py_hashtable_t *closed_stackrefs_table; # endif +#endif + +#ifdef Py_STATS + // true if recording of pystats is on, this is used when new threads + // are created to decide if recording should be on for them + int pystats_enabled; + // allocated when (and if) stats are first enabled + PyStats *pystats_struct; +#ifdef Py_GIL_DISABLED + // held when pystats related interpreter state is being updated + PyMutex pystats_mutex; +#endif #endif /* the initial PyInterpreterState.threads.head */ diff --git a/Include/internal/pycore_interpframe.h b/Include/internal/pycore_interpframe.h index 2ee3696317c..8949d6cc2fc 100644 --- a/Include/internal/pycore_interpframe.h +++ b/Include/internal/pycore_interpframe.h @@ -24,6 +24,36 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { return (PyCodeObject *)executable; } +// Similar to _PyFrame_GetCode(), but return NULL if the frame is invalid or +// freed. Used by dump_frame() in Python/traceback.c. The function uses +// heuristics to detect freed memory, it's not 100% reliable. +static inline PyCodeObject* +_PyFrame_SafeGetCode(_PyInterpreterFrame *f) +{ + // globals and builtins may be NULL on a legit frame, but it's unlikely. + // It's more likely that it's a sign of an invalid frame. + if (f->f_globals == NULL || f->f_builtins == NULL) { + return NULL; + } + + if (PyStackRef_IsNull(f->f_executable)) { + return NULL; + } + void *ptr; + memcpy(&ptr, &f->f_executable, sizeof(f->f_executable)); + if (_PyMem_IsPtrFreed(ptr)) { + return NULL; + } + PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); + if (_PyObject_IsFreed(executable)) { + return NULL; + } + if (!PyCode_Check(executable)) { + return NULL; + } + return (PyCodeObject *)executable; +} + static inline _Py_CODEUNIT * _PyFrame_GetBytecode(_PyInterpreterFrame *f) { @@ -37,6 +67,31 @@ _PyFrame_GetBytecode(_PyInterpreterFrame *f) #endif } +// Similar to PyUnstable_InterpreterFrame_GetLasti(), but return NULL if the +// frame is invalid or freed. Used by dump_frame() in Python/traceback.c. The +// function uses heuristics to detect freed memory, it's not 100% reliable. +static inline int +_PyFrame_SafeGetLasti(struct _PyInterpreterFrame *f) +{ + // Code based on _PyFrame_GetBytecode() but replace _PyFrame_GetCode() + // with _PyFrame_SafeGetCode(). + PyCodeObject *co = _PyFrame_SafeGetCode(f); + if (co == NULL) { + return -1; + } + + _Py_CODEUNIT *bytecode; +#ifdef Py_GIL_DISABLED + _PyCodeArray *tlbc = _PyCode_GetTLBCArray(co); + assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size); + bytecode = (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index]; +#else + bytecode = _PyCode_CODE(co); +#endif + + return (int)(f->instr_ptr - bytecode) * sizeof(_Py_CODEUNIT); +} + static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) { PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj); assert(PyFunction_Check(func)); diff --git a/Include/internal/pycore_interpframe_structs.h b/Include/internal/pycore_interpframe_structs.h index 835b8e58194..38510685f40 100644 --- a/Include/internal/pycore_interpframe_structs.h +++ b/Include/internal/pycore_interpframe_structs.h @@ -24,7 +24,6 @@ enum _frameowner { FRAME_OWNED_BY_GENERATOR = 1, FRAME_OWNED_BY_FRAME_OBJECT = 2, FRAME_OWNED_BY_INTERPRETER = 3, - FRAME_OWNED_BY_CSTACK = 4, }; struct _PyInterpreterFrame { diff --git a/Include/internal/pycore_jit.h b/Include/internal/pycore_jit.h index 8a88cbf607b..b1550a6ddcf 100644 --- a/Include/internal/pycore_jit.h +++ b/Include/internal/pycore_jit.h @@ -13,6 +13,9 @@ extern "C" { # error "this header requires Py_BUILD_CORE define" #endif +/* To be able to reason about code layout and branches, keep code size below 1 MB */ +#define PY_MAX_JIT_CODE_SIZE ((1 << 20)-1) + #ifdef _Py_JIT typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate); diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index cfaf84181d5..0dd18ff3cb0 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -286,7 +286,8 @@ Known values: Python 3.15a1 3653 (Fix handling of opcodes that may leave operands on the stack when optimizing LOAD_FAST) Python 3.15a1 3654 (Fix missing exception handlers in logical expression) Python 3.15a1 3655 (Fix miscompilation of some module-level annotations) - Python 3.15a1 3656 Lazy imports IMPORT_NAME opcode changes + Python 3.15a1 3656 (Add TRACE_RECORD instruction, for platforms with switch based interpreter) + Python 3.15a1 3657 Lazy imports IMPORT_NAME opcode changes Python 3.16 will start with 3700 @@ -300,7 +301,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3657 +#define PYC_MAGIC_NUMBER 3658 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Include/internal/pycore_moduleobject.h b/Include/internal/pycore_moduleobject.h index 89131c6f0a1..15a68523f47 100644 --- a/Include/internal/pycore_moduleobject.h +++ b/Include/internal/pycore_moduleobject.h @@ -1,5 +1,8 @@ #ifndef Py_INTERNAL_MODULEOBJECT_H #define Py_INTERNAL_MODULEOBJECT_H + +#include + #ifdef __cplusplus extern "C" { #endif @@ -16,34 +19,51 @@ extern int _PyModule_IsPossiblyShadowing(PyObject *); extern int _PyModule_IsExtension(PyObject *obj); +typedef int (*_Py_modexecfunc)(PyObject *); + typedef struct { PyObject_HEAD PyObject *md_dict; - PyModuleDef *md_def; void *md_state; PyObject *md_weaklist; // for logging purposes after md_dict is cleared PyObject *md_name; // module version we last checked for lazy values uint32_t m_dict_version; + bool md_token_is_def; /* if true, `md_token` is the PyModuleDef */ #ifdef Py_GIL_DISABLED - void *md_gil; + bool md_requires_gil; #endif + Py_ssize_t md_state_size; + traverseproc md_state_traverse; + inquiry md_state_clear; + freefunc md_state_free; + void *md_token; + _Py_modexecfunc md_exec; /* only set if md_token_is_def is true */ } PyModuleObject; -static inline PyModuleDef* _PyModule_GetDef(PyObject *mod) { - assert(PyModule_Check(mod)); - return ((PyModuleObject *)mod)->md_def; +#define _PyModule_CAST(op) \ + (assert(PyModule_Check(op)), _Py_CAST(PyModuleObject*, (op))) + +static inline PyModuleDef *_PyModule_GetDefOrNull(PyObject *arg) { + PyModuleObject *mod = _PyModule_CAST(arg); + if (mod->md_token_is_def) { + return (PyModuleDef *)mod->md_token; + } + return NULL; +} + +static inline PyModuleDef *_PyModule_GetToken(PyObject *arg) { + PyModuleObject *mod = _PyModule_CAST(arg); + return (PyModuleDef *)mod->md_token; } static inline void* _PyModule_GetState(PyObject* mod) { - assert(PyModule_Check(mod)); - return ((PyModuleObject *)mod)->md_state; + return _PyModule_CAST(mod)->md_state; } static inline PyObject* _PyModule_GetDict(PyObject *mod) { - assert(PyModule_Check(mod)); - PyObject *dict = ((PyModuleObject *)mod) -> md_dict; + PyObject *dict = _PyModule_CAST(mod)->md_dict; // _PyModule_GetDict(mod) must not be used after calling module_clear(mod) assert(dict != NULL); return dict; // borrowed reference diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 980d6d7764b..fb50acd62da 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -863,8 +863,7 @@ static inline Py_hash_t _PyObject_HashFast(PyObject *op) { if (PyUnicode_CheckExact(op)) { - Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED( - _PyASCIIObject_CAST(op)->hash); + Py_hash_t hash = PyUnstable_Unicode_GET_CACHED_HASH(op); if (hash != -1) { return hash; } diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index bd6b84ec7fd..cca88818c57 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -488,6 +488,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 1; case TO_BOOL_STR: return 1; + case TRACE_RECORD: + return 0; case UNARY_INVERT: return 1; case UNARY_NEGATIVE: @@ -971,6 +973,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case TO_BOOL_STR: return 1; + case TRACE_RECORD: + return 0; case UNARY_INVERT: return 1; case UNARY_NEGATIVE: @@ -1031,6 +1035,8 @@ enum InstructionFormat { #define HAS_ERROR_NO_POP_FLAG (4096) #define HAS_NO_SAVE_IP_FLAG (8192) #define HAS_PERIODIC_FLAG (16384) +#define HAS_UNPREDICTABLE_JUMP_FLAG (32768) +#define HAS_NEEDS_GUARD_IP_FLAG (65536) #define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) #define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) #define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) @@ -1046,6 +1052,8 @@ enum InstructionFormat { #define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG)) #define OPCODE_HAS_NO_SAVE_IP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NO_SAVE_IP_FLAG)) #define OPCODE_HAS_PERIODIC(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PERIODIC_FLAG)) +#define OPCODE_HAS_UNPREDICTABLE_JUMP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_UNPREDICTABLE_JUMP_FLAG)) +#define OPCODE_HAS_NEEDS_GUARD_IP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NEEDS_GUARD_IP_FLAG)) #define OPARG_SIMPLE 0 #define OPARG_CACHE_1 1 @@ -1062,7 +1070,7 @@ enum InstructionFormat { struct opcode_metadata { uint8_t valid_entry; uint8_t instr_format; - uint16_t flags; + uint32_t flags; }; extern const struct opcode_metadata _PyOpcode_opcode_metadata[267]; @@ -1077,7 +1085,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG }, [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG }, + [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_LIST_SLICE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, @@ -1094,22 +1102,22 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BUILD_TEMPLATE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, - [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_BUILTIN_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_BUILTIN_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_INTRINSIC_1] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_ISINSTANCE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1117,8 +1125,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_NON_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, - [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, @@ -1143,7 +1151,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [DELETE_SUBSCR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [END_FOR] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG }, [END_SEND] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1151,11 +1159,11 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [FORMAT_SIMPLE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, - [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, + [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, + [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, + [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, + [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG }, [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [GET_ANEXT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [GET_AWAITABLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1164,13 +1172,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_END_ASYNC_FOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG }, [INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1183,8 +1191,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, [INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1197,7 +1205,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG }, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, @@ -1205,7 +1213,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1253,10 +1261,10 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [RESERVED] = { true, INSTR_FMT_IX, 0 }, [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, - [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, - [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_UNPREDICTABLE_JUMP_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, + [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [SETUP_ANNOTATIONS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [SET_ADD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, @@ -1283,6 +1291,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [TRACE_RECORD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, @@ -1292,7 +1301,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NEEDS_GUARD_IP_FLAG }, [ANNOTATIONS_PLACEHOLDER] = { true, -1, HAS_PURE_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_IF_FALSE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1406,6 +1415,9 @@ _PyOpcode_macro_expansion[256] = { [IMPORT_FROM] = { .nuops = 1, .uops = { { _IMPORT_FROM, OPARG_SIMPLE, 0 } } }, [IMPORT_NAME] = { .nuops = 1, .uops = { { _IMPORT_NAME, OPARG_SIMPLE, 0 } } }, [IS_OP] = { .nuops = 1, .uops = { { _IS_OP, OPARG_SIMPLE, 0 } } }, + [JUMP_BACKWARD] = { .nuops = 2, .uops = { { _CHECK_PERIODIC, OPARG_SIMPLE, 1 }, { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 1 } } }, + [JUMP_BACKWARD_NO_INTERRUPT] = { .nuops = 1, .uops = { { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 0 } } }, + [JUMP_BACKWARD_NO_JIT] = { .nuops = 2, .uops = { { _CHECK_PERIODIC, OPARG_SIMPLE, 1 }, { _JUMP_BACKWARD_NO_INTERRUPT, OPARG_REPLACED, 1 } } }, [LIST_APPEND] = { .nuops = 1, .uops = { { _LIST_APPEND, OPARG_SIMPLE, 0 } } }, [LIST_EXTEND] = { .nuops = 1, .uops = { { _LIST_EXTEND, OPARG_SIMPLE, 0 } } }, [LOAD_ATTR] = { .nuops = 1, .uops = { { _LOAD_ATTR, OPARG_SIMPLE, 8 } } }, @@ -1731,6 +1743,7 @@ const char *_PyOpcode_OpName[267] = { [TO_BOOL_LIST] = "TO_BOOL_LIST", [TO_BOOL_NONE] = "TO_BOOL_NONE", [TO_BOOL_STR] = "TO_BOOL_STR", + [TRACE_RECORD] = "TRACE_RECORD", [UNARY_INVERT] = "UNARY_INVERT", [UNARY_NEGATIVE] = "UNARY_NEGATIVE", [UNARY_NOT] = "UNARY_NOT", @@ -1802,7 +1815,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [230] = 230, [231] = 231, [232] = 232, - [233] = 233, [BINARY_OP] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, @@ -2018,6 +2030,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [TO_BOOL_LIST] = TO_BOOL, [TO_BOOL_NONE] = TO_BOOL, [TO_BOOL_STR] = TO_BOOL, + [TRACE_RECORD] = TRACE_RECORD, [UNARY_INVERT] = UNARY_INVERT, [UNARY_NEGATIVE] = UNARY_NEGATIVE, [UNARY_NOT] = UNARY_NOT, @@ -2063,7 +2076,6 @@ const uint8_t _PyOpcode_Deopt[256] = { case 230: \ case 231: \ case 232: \ - case 233: \ ; struct pseudo_targets { uint8_t as_sequence; diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 8ed5436eb68..e7177552cf6 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -21,14 +21,6 @@ typedef struct _PyExecutorLinkListNode { } _PyExecutorLinkListNode; -/* Bloom filter with m = 256 - * https://en.wikipedia.org/wiki/Bloom_filter */ -#define _Py_BLOOM_FILTER_WORDS 8 - -typedef struct { - uint32_t bits[_Py_BLOOM_FILTER_WORDS]; -} _PyBloomFilter; - typedef struct { uint8_t opcode; uint8_t oparg; @@ -44,7 +36,9 @@ typedef struct { typedef struct _PyExitData { uint32_t target; - uint16_t index; + uint16_t index:14; + uint16_t is_dynamic:1; + uint16_t is_control_flow:1; _Py_BackoffCounter temperature; struct _PyExecutorObject *executor; } _PyExitData; @@ -94,9 +88,8 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp); // This value is arbitrary and was not optimized. #define JIT_CLEANUP_THRESHOLD 1000 -#define TRACE_STACK_SIZE 5 - -int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, +int _Py_uop_analyze_and_optimize( + PyFunctionObject *func, _PyUOpInstruction *trace, int trace_len, int curr_stackentries, _PyBloomFilter *dependencies); @@ -130,7 +123,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) #define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5) // Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) -#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) +#define MAX_ABSTRACT_FRAME_DEPTH (16) // The maximum number of side exits that we can take before requiring forward // progress (and inserting a new ENTER_EXECUTOR instruction). In practice, this @@ -258,6 +251,7 @@ struct _Py_UOpsAbstractFrame { int stack_len; int locals_len; PyFunctionObject *func; + PyCodeObject *code; JitOptRef *stack_pointer; JitOptRef *stack; @@ -333,11 +327,11 @@ extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( int curr_stackentries, JitOptRef *args, int arg_len); -extern int _Py_uop_frame_pop(JitOptContext *ctx); +extern int _Py_uop_frame_pop(JitOptContext *ctx, PyCodeObject *co, int curr_stackentries); PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); -PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, _PyExecutorObject **exec_ptr, int chain_depth); +PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, PyThreadState *tstate); static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) { @@ -346,6 +340,7 @@ static inline _PyExecutorObject *_PyExecutor_FromExit(_PyExitData *exit) } extern _PyExecutorObject *_PyExecutor_GetColdExecutor(void); +extern _PyExecutorObject *_PyExecutor_GetColdDynamicExecutor(void); PyAPI_FUNC(void) _PyExecutor_ClearExit(_PyExitData *exit); @@ -354,7 +349,9 @@ static inline int is_terminator(const _PyUOpInstruction *uop) int opcode = uop->opcode; return ( opcode == _EXIT_TRACE || - opcode == _JUMP_TO_TOP + opcode == _DEOPT || + opcode == _JUMP_TO_TOP || + opcode == _DYNAMIC_EXIT ); } @@ -365,6 +362,18 @@ PyAPI_FUNC(int) _PyDumpExecutors(FILE *out); extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp); #endif +int _PyJit_translate_single_bytecode_to_trace(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *next_instr, int stop_tracing_opcode); + +PyAPI_FUNC(int) +_PyJit_TryInitializeTracing(PyThreadState *tstate, _PyInterpreterFrame *frame, + _Py_CODEUNIT *curr_instr, _Py_CODEUNIT *start_instr, + _Py_CODEUNIT *close_loop_instr, int curr_stackdepth, int chain_depth, _PyExitData *exit, + int oparg); + +void _PyJit_FinalizeTracing(PyThreadState *tstate); + +void _PyJit_Tracer_InvalidateDependency(PyThreadState *old_tstate, void *obj); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_parser.h b/Include/internal/pycore_parser.h index 2885dee63dc..2c46f59ab7d 100644 --- a/Include/internal/pycore_parser.h +++ b/Include/internal/pycore_parser.h @@ -48,7 +48,8 @@ extern struct _mod* _PyParser_ASTFromString( PyObject* filename, int mode, PyCompilerFlags *flags, - PyArena *arena); + PyArena *arena, + PyObject *module); extern struct _mod* _PyParser_ASTFromFile( FILE *fp, diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index c31c3365700..2ae0185226f 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -77,6 +77,10 @@ extern "C" { _Py_atomic_store_ushort_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \ _Py_atomic_load_ushort_relaxed(&value) +#define FT_ATOMIC_LOAD_INT(value) \ + _Py_atomic_load_int(&value) +#define FT_ATOMIC_STORE_INT(value, new_value) \ + _Py_atomic_store_int(&value, new_value) #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \ _Py_atomic_store_int_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_INT_RELAXED(value) \ @@ -144,6 +148,8 @@ extern "C" { #define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value #define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_INT(value) value +#define FT_ATOMIC_STORE_INT(value, new_value) value = new_value #define FT_ATOMIC_LOAD_INT_RELAXED(value) value #define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_UINT_RELAXED(value) value diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index 2c2048f7e12..f80808fcc8c 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -123,7 +123,8 @@ extern void _PyErr_SetNone(PyThreadState *tstate, PyObject *exception); extern PyObject* _PyErr_NoMemory(PyThreadState *tstate); extern int _PyErr_EmitSyntaxWarning(PyObject *msg, PyObject *filename, int lineno, int col_offset, - int end_lineno, int end_col_offset); + int end_lineno, int end_col_offset, + PyObject *module); extern void _PyErr_RaiseSyntaxError(PyObject *msg, PyObject *filename, int lineno, int col_offset, int end_lineno, int end_col_offset); diff --git a/Include/internal/pycore_pymath.h b/Include/internal/pycore_pymath.h index eea8996ba68..4fcac3aab8b 100644 --- a/Include/internal/pycore_pymath.h +++ b/Include/internal/pycore_pymath.h @@ -33,7 +33,7 @@ extern "C" { static inline void _Py_ADJUST_ERANGE1(double x) { if (errno == 0) { - if (x == Py_INFINITY || x == -Py_INFINITY) { + if (x == INFINITY || x == -INFINITY) { errno = ERANGE; } } @@ -44,8 +44,8 @@ static inline void _Py_ADJUST_ERANGE1(double x) static inline void _Py_ADJUST_ERANGE2(double x, double y) { - if (x == Py_INFINITY || x == -Py_INFINITY || - y == Py_INFINITY || y == -Py_INFINITY) + if (x == INFINITY || x == -INFINITY || + y == INFINITY || y == -INFINITY) { if (errno == 0) { errno = ERANGE; diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index f3f2ae0a140..05484e847f1 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -54,20 +54,43 @@ static inline int _PyMem_IsPtrFreed(const void *ptr) { uintptr_t value = (uintptr_t)ptr; #if SIZEOF_VOID_P == 8 - return (value == 0 + return (value <= 0xff // NULL, 0x1, 0x2, ..., 0xff || value == (uintptr_t)0xCDCDCDCDCDCDCDCD || value == (uintptr_t)0xDDDDDDDDDDDDDDDD - || value == (uintptr_t)0xFDFDFDFDFDFDFDFD); + || value == (uintptr_t)0xFDFDFDFDFDFDFDFD + || value >= (uintptr_t)0xFFFFFFFFFFFFFF00); // -0xff, ..., -2, -1 #elif SIZEOF_VOID_P == 4 - return (value == 0 + return (value <= 0xff || value == (uintptr_t)0xCDCDCDCD || value == (uintptr_t)0xDDDDDDDD - || value == (uintptr_t)0xFDFDFDFD); + || value == (uintptr_t)0xFDFDFDFD + || value >= (uintptr_t)0xFFFFFF00); #else # error "unknown pointer size" #endif } +// Similar to _PyMem_IsPtrFreed() but expects an 'unsigned long' instead of a +// pointer. +static inline int _PyMem_IsULongFreed(unsigned long value) +{ +#if SIZEOF_LONG == 8 + return (value == 0 + || value == (unsigned long)0xCDCDCDCDCDCDCDCD + || value == (unsigned long)0xDDDDDDDDDDDDDDDD + || value == (unsigned long)0xFDFDFDFDFDFDFDFD + || value == (unsigned long)0xFFFFFFFFFFFFFFFF); +#elif SIZEOF_LONG == 4 + return (value == 0 + || value == (unsigned long)0xCDCDCDCD + || value == (unsigned long)0xDDDDDDDD + || value == (unsigned long)0xFDFDFDFD + || value == (unsigned long)0xFFFFFFFF); +#else +# error "unknown long size" +#endif +} + extern int _PyMem_GetAllocatorName( const char *name, PyMemAllocatorName *allocator); diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index cab458f8402..189a8dde9f0 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -331,7 +331,11 @@ _Py_RecursionLimit_GetMargin(PyThreadState *tstate) _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; assert(_tstate->c_stack_hard_limit != 0); intptr_t here_addr = _Py_get_machine_stack_pointer(); +#if _Py_STACK_GROWS_DOWN return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, _PyOS_STACK_MARGIN_SHIFT); +#else + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, (intptr_t)_tstate->c_stack_soft_limit - here_addr, _PyOS_STACK_MARGIN_SHIFT); +#endif } #ifdef __cplusplus diff --git a/Include/internal/pycore_pystats.h b/Include/internal/pycore_pystats.h index f8af398a560..50ab21aa0f1 100644 --- a/Include/internal/pycore_pystats.h +++ b/Include/internal/pycore_pystats.h @@ -9,7 +9,7 @@ extern "C" { #endif #ifdef Py_STATS -extern void _Py_StatsOn(void); +extern int _Py_StatsOn(void); extern void _Py_StatsOff(void); extern void _Py_StatsClear(void); extern int _Py_PrintSpecializationStats(int to_file); diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index c2832098ddb..04a557e1204 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -33,6 +33,12 @@ extern const char* _Py_SourceAsString( PyCompilerFlags *cf, PyObject **cmd_copy); +extern PyObject * _Py_CompileStringObjectWithModule( + const char *str, + PyObject *filename, int start, + PyCompilerFlags *flags, int optimize, + PyObject *module); + /* Stack size, in "pointers". This must be large enough, so * no two calls to check recursion depth are more than this far @@ -54,6 +60,12 @@ extern const char* _Py_SourceAsString( # define _PyOS_STACK_MARGIN_SHIFT (_PyOS_LOG2_STACK_MARGIN + 2) #endif +#ifdef _Py_THREAD_SANITIZER +# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 6) +#else +# define _PyOS_MIN_STACK_SIZE (_PyOS_STACK_MARGIN_BYTES * 3) +#endif + #ifdef __cplusplus } diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 5eddb24b854..c0f60925a0f 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1321,10 +1321,12 @@ extern "C" { INIT_STR(dot_locals, "."), \ INIT_STR(empty, ""), \ INIT_STR(format, ".format"), \ + INIT_STR(gc, ""), \ INIT_STR(generic_base, ".generic_base"), \ INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(kwdefaults, ".kwdefaults"), \ INIT_STR(list_err, "list index out of range"), \ + INIT_STR(native, ""), \ INIT_STR(str_replace_inf, "1e309"), \ INIT_STR(type_params, ".type_params"), \ INIT_STR(utf_8, "utf-8"), \ @@ -1764,6 +1766,7 @@ extern "C" { INIT_ID(fullerror), \ INIT_ID(func), \ INIT_ID(future), \ + INIT_ID(gc), \ INIT_ID(generation), \ INIT_ID(get), \ INIT_ID(get_debug), \ @@ -1907,6 +1910,7 @@ extern "C" { INIT_ID(name_from), \ INIT_ID(namespace_separator), \ INIT_ID(namespaces), \ + INIT_ID(native), \ INIT_ID(ndigits), \ INIT_ID(nested), \ INIT_ID(new_file_name), \ @@ -2067,6 +2071,7 @@ extern "C" { INIT_ID(symmetric_difference_update), \ INIT_ID(tabsize), \ INIT_ID(tag), \ + INIT_ID(take_bytes), \ INIT_ID(target), \ INIT_ID(target_is_directory), \ INIT_ID(task), \ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 15a703a0820..e59611c07fa 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -63,6 +63,8 @@ PyAPI_FUNC(PyObject *) _Py_stackref_get_object(_PyStackRef ref); PyAPI_FUNC(PyObject *) _Py_stackref_close(_PyStackRef ref, const char *filename, int linenumber); PyAPI_FUNC(_PyStackRef) _Py_stackref_create(PyObject *obj, uint16_t flags, const char *filename, int linenumber); PyAPI_FUNC(void) _Py_stackref_record_borrow(_PyStackRef ref, const char *filename, int linenumber); +PyAPI_FUNC(_PyStackRef) _Py_stackref_get_borrowed_from(_PyStackRef ref, const char *filename, int linenumber); +PyAPI_FUNC(void) _Py_stackref_set_borrowed_from(_PyStackRef ref, _PyStackRef borrowed_from, const char *filename, int linenumber); extern void _Py_stackref_associate(PyInterpreterState *interp, PyObject *obj, _PyStackRef ref); static const _PyStackRef PyStackRef_NULL = { .index = 0 }; @@ -248,7 +250,12 @@ _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) } else { flags = Py_TAG_REFCNT; } - return _Py_stackref_create(obj, flags, filename, linenumber); + _PyStackRef new_ref = _Py_stackref_create(obj, flags, filename, linenumber); + if (flags == Py_TAG_REFCNT && !_Py_IsImmortal(obj)) { + _PyStackRef borrowed_from = _Py_stackref_get_borrowed_from(ref, filename, linenumber); + _Py_stackref_set_borrowed_from(new_ref, borrowed_from, filename, linenumber); + } + return new_ref; } #define PyStackRef_DUP(REF) _PyStackRef_DUP(REF, __FILE__, __LINE__) @@ -259,6 +266,7 @@ _PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct, const char * assert(!PyStackRef_IsNull(ref)); assert(!PyStackRef_IsTaggedInt(ref)); PyObject *obj = _Py_stackref_close(ref, filename, linenumber); + assert(Py_REFCNT(obj) > 0); if (PyStackRef_RefcountOnObject(ref)) { _Py_DECREF_SPECIALIZED(obj, destruct); } @@ -274,7 +282,11 @@ _PyStackRef_Borrow(_PyStackRef ref, const char *filename, int linenumber) return ref; } PyObject *obj = _Py_stackref_get_object(ref); - return _Py_stackref_create(obj, Py_TAG_REFCNT, filename, linenumber); + _PyStackRef new_ref = _Py_stackref_create(obj, Py_TAG_REFCNT, filename, linenumber); + if (!_Py_IsImmortal(obj)) { + _Py_stackref_set_borrowed_from(new_ref, ref, filename, linenumber); + } + return new_ref; } #define PyStackRef_Borrow(REF) _PyStackRef_Borrow((REF), __FILE__, __LINE__) @@ -310,13 +322,22 @@ PyStackRef_IsHeapSafe(_PyStackRef ref) static inline _PyStackRef _PyStackRef_MakeHeapSafe(_PyStackRef ref, const char *filename, int linenumber) { - if (PyStackRef_IsHeapSafe(ref)) { + // Special references that can't be closed. + if (ref.index < INITIAL_STACKREF_INDEX) { return ref; } + bool heap_safe = PyStackRef_IsHeapSafe(ref); PyObject *obj = _Py_stackref_close(ref, filename, linenumber); - Py_INCREF(obj); - return _Py_stackref_create(obj, 0, filename, linenumber); + uint16_t flags = 0; + if (heap_safe) { + // Close old ref and create a new one with the same flags. + // This is necessary for correct borrow checking. + flags = ref.index & Py_TAG_BITS; + } else { + Py_INCREF(obj); + } + return _Py_stackref_create(obj, flags, filename, linenumber); } #define PyStackRef_MakeHeapSafe(REF) _PyStackRef_MakeHeapSafe(REF, __FILE__, __LINE__) diff --git a/Include/internal/pycore_stats.h b/Include/internal/pycore_stats.h index 24f239a2135..850e6ea4552 100644 --- a/Include/internal/pycore_stats.h +++ b/Include/internal/pycore_stats.h @@ -15,39 +15,56 @@ extern "C" { #include "pycore_bitutils.h" // _Py_bit_length -#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0) -#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0) -#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0) -#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0) -#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0) -#define OBJECT_STAT_INC_COND(name, cond) \ - do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0) -#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0) -#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \ - do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) -#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) -#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) -#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0) -#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) -#define UOP_PAIR_INC(uopcode, lastuop) \ - do { \ - if (lastuop && _Py_stats) { \ - _Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \ - } \ - lastuop = uopcode; \ - } while (0) -#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) -#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0) -#define OPT_HIST(length, name) \ +#define STAT_INC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name++) +#define STAT_DEC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name--) +#define OPCODE_EXE_INC(opname) _Py_STATS_EXPR(opcode_stats[opname].execution_count++) +#define CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.name++) +#define OBJECT_STAT_INC(name) _Py_STATS_EXPR(object_stats.name++) +#define OBJECT_STAT_INC_COND(name, cond) _Py_STATS_COND_EXPR(cond, object_stats.name++) +#define EVAL_CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.eval_calls[name]++) +#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) _Py_STATS_COND_EXPR(PyFunction_Check(callable), call_stats.eval_calls[name]++) +#define GC_STAT_ADD(gen, name, n) _Py_STATS_EXPR(gc_stats[(gen)].name += (n)) +#define OPT_STAT_INC(name) _Py_STATS_EXPR(optimization_stats.name++) +#define OPT_STAT_ADD(name, n) _Py_STATS_EXPR(optimization_stats.name += (n)) +#define UOP_STAT_INC(opname, name) \ do { \ - if (_Py_stats) { \ - int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \ - bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \ - _Py_stats->optimization_stats.name[bucket]++; \ + PyStats *s = _PyStats_GET(); \ + if (s) { \ + assert(opname < 512); \ + s->optimization_stats.opcode[opname].name++; \ } \ } while (0) -#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0) -#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0) +#define UOP_PAIR_INC(uopcode, lastuop) \ + do { \ + PyStats *s = _PyStats_GET(); \ + if (lastuop && s) { \ + s->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \ + } \ + lastuop = uopcode; \ + } while (0) +#define OPT_UNSUPPORTED_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.unsupported_opcode[opname]++) +#define OPT_ERROR_IN_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.error_in_opcode[opname]++) +#define OPT_HIST(length, name) \ + do { \ + PyStats *s = _PyStats_GET(); \ + if (s) { \ + int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \ + bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \ + s->optimization_stats.name[bucket]++; \ + } \ + } while (0) +#define RARE_EVENT_STAT_INC(name) _Py_STATS_EXPR(rare_event_stats.name++) +#define OPCODE_DEFERRED_INC(opname) _Py_STATS_COND_EXPR(opcode==opname, opcode_stats[opname].specialization.deferred++) + +#ifdef Py_GIL_DISABLED +#define FT_STAT_MUTEX_SLEEP_INC() _Py_STATS_EXPR(ft_stats.mutex_sleeps++) +#define FT_STAT_QSBR_POLL_INC() _Py_STATS_EXPR(ft_stats.qsbr_polls++) +#define FT_STAT_WORLD_STOP_INC() _Py_STATS_EXPR(ft_stats.world_stops++) +#else +#define FT_STAT_MUTEX_SLEEP_INC() +#define FT_STAT_QSBR_POLL_INC() +#define FT_STAT_WORLD_STOP_INC() +#endif // Export for '_opcode' shared extension PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); @@ -71,6 +88,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define OPT_HIST(length, name) ((void)0) #define RARE_EVENT_STAT_INC(name) ((void)0) #define OPCODE_DEFERRED_INC(opname) ((void)0) +#define FT_STAT_MUTEX_SLEEP_INC() +#define FT_STAT_QSBR_POLL_INC() +#define FT_STAT_WORLD_STOP_INC() #endif // !Py_STATS @@ -90,6 +110,11 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); RARE_EVENT_INTERP_INC(interp, name); \ } while (0); \ +PyStatus _PyStats_InterpInit(PyInterpreterState *); +bool _PyStats_ThreadInit(PyInterpreterState *, _PyThreadStateImpl *); +void _PyStats_ThreadFini(_PyThreadStateImpl *); +void _PyStats_Attach(_PyThreadStateImpl *); +void _PyStats_Detach(_PyThreadStateImpl *); #ifdef __cplusplus } diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h index 88189228e1a..ac40e5d6eee 100644 --- a/Include/internal/pycore_symtable.h +++ b/Include/internal/pycore_symtable.h @@ -189,7 +189,8 @@ extern struct symtable* _Py_SymtableStringObjectFlags( const char *str, PyObject *filename, int start, - PyCompilerFlags *flags); + PyCompilerFlags *flags, + PyObject *module); int _PyFuture_FromAST( struct _mod * mod, diff --git a/Include/internal/pycore_time.h b/Include/internal/pycore_time.h index 23312471c65..b671225ca6e 100644 --- a/Include/internal/pycore_time.h +++ b/Include/internal/pycore_time.h @@ -147,11 +147,6 @@ extern int _PyTime_FromSecondsDouble( // Clamp to [PyTime_MIN; PyTime_MAX] on overflow. extern PyTime_t _PyTime_FromMicrosecondsClamp(PyTime_t us); -// Create a timestamp from a Python int object (number of nanoseconds). -// Export for '_lsprof' shared extension. -PyAPI_FUNC(int) _PyTime_FromLong(PyTime_t *t, - PyObject *obj); - // Convert a number of seconds (Python float or int) to a timestamp. // Raise an exception and return -1 on error, return 0 on success. // Export for '_socket' shared extension. @@ -182,10 +177,6 @@ extern PyTime_t _PyTime_As100Nanoseconds(PyTime_t t, _PyTime_round_t round); #endif -// Convert a timestamp (number of nanoseconds) as a Python int object. -// Export for '_testinternalcapi' shared extension. -PyAPI_FUNC(PyObject*) _PyTime_AsLong(PyTime_t t); - #ifndef MS_WINDOWS // Create a timestamp from a timeval structure. // Raise an exception and return -1 on overflow, return 0 on success. diff --git a/Include/internal/pycore_tracemalloc.h b/Include/internal/pycore_tracemalloc.h index 572e8025876..693385f9a46 100644 --- a/Include/internal/pycore_tracemalloc.h +++ b/Include/internal/pycore_tracemalloc.h @@ -30,8 +30,8 @@ struct _PyTraceMalloc_Config { }; -/* Pack the frame_t structure to reduce the memory footprint on 64-bit - architectures: 12 bytes instead of 16. */ +/* Pack the tracemalloc_frame and tracemalloc_traceback structures to reduce + the memory footprint on 64-bit architectures: 12 bytes instead of 16. */ #if defined(_MSC_VER) #pragma pack(push, 4) #endif @@ -46,18 +46,22 @@ tracemalloc_frame { PyObject *filename; unsigned int lineno; }; -#ifdef _MSC_VER -#pragma pack(pop) -#endif -struct tracemalloc_traceback { +struct +#ifdef __GNUC__ +__attribute__((packed)) +#endif +tracemalloc_traceback { Py_uhash_t hash; /* Number of frames stored */ uint16_t nframe; /* Total number of frames the traceback had */ uint16_t total_nframe; - struct tracemalloc_frame frames[1]; + struct tracemalloc_frame frames[]; }; +#ifdef _MSC_VER +#pragma pack(pop) +#endif struct _tracemalloc_runtime_state { @@ -95,7 +99,7 @@ struct _tracemalloc_runtime_state { Protected by TABLES_LOCK(). */ _Py_hashtable_t *domains; - struct tracemalloc_traceback empty_traceback; + struct tracemalloc_traceback *empty_traceback; Py_tss_t reentrant_key; }; diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index bad968428c7..50048801b2e 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -12,7 +12,8 @@ extern "C" { #include "pycore_freelist_state.h" // struct _Py_freelists #include "pycore_mimalloc.h" // struct _mimalloc_thread_state #include "pycore_qsbr.h" // struct qsbr - +#include "pycore_uop.h" // struct _PyUOpInstruction +#include "pycore_structs.h" #ifdef Py_GIL_DISABLED struct _gc_thread_state { @@ -21,6 +22,38 @@ struct _gc_thread_state { }; #endif +#if _Py_TIER2 +typedef struct _PyJitTracerInitialState { + int stack_depth; + int chain_depth; + struct _PyExitData *exit; + PyCodeObject *code; // Strong + PyFunctionObject *func; // Strong + _Py_CODEUNIT *start_instr; + _Py_CODEUNIT *close_loop_instr; + _Py_CODEUNIT *jump_backward_instr; +} _PyJitTracerInitialState; + +typedef struct _PyJitTracerPreviousState { + bool dependencies_still_valid; + bool instr_is_super; + int code_max_size; + int code_curr_size; + int instr_oparg; + int instr_stacklevel; + _Py_CODEUNIT *instr; + PyCodeObject *instr_code; // Strong + struct _PyInterpreterFrame *instr_frame; + _PyBloomFilter dependencies; +} _PyJitTracerPreviousState; + +typedef struct _PyJitTracerState { + _PyUOpInstruction *code_buffer; + _PyJitTracerInitialState initial_state; + _PyJitTracerPreviousState prev_state; +} _PyJitTracerState; +#endif + // Every PyThreadState is actually allocated as a _PyThreadStateImpl. The // PyThreadState fields are exposed as part of the C API, although most fields // are intended to be private. The _PyThreadStateImpl fields not exposed. @@ -37,6 +70,10 @@ typedef struct _PyThreadStateImpl { uintptr_t c_stack_soft_limit; uintptr_t c_stack_hard_limit; + // PyUnstable_ThreadState_ResetStackProtection() values + uintptr_t c_stack_init_base; + uintptr_t c_stack_init_top; + PyObject *asyncio_running_loop; // Strong reference PyObject *asyncio_running_task; // Strong reference @@ -70,12 +107,20 @@ typedef struct _PyThreadStateImpl { // When >1, code objects do not immortalize their non-string constants. int suppress_co_const_immortalization; + +#ifdef Py_STATS + // per-thread stats, will be merged into interp->pystats_struct + PyStats *pystats_struct; // allocated by _PyStats_ThreadInit() #endif +#endif // Py_GIL_DISABLED + #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) Py_ssize_t reftotal; // this thread's total refcount operations #endif - +#if _Py_TIER2 + _PyJitTracerState jit_tracer_state; +#endif } _PyThreadStateImpl; #ifdef __cplusplus diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index e7ca65a56b6..97dda73f9b5 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -307,14 +307,6 @@ PyAPI_FUNC(PyObject*) _PyUnicode_JoinArray( Py_ssize_t seqlen ); -/* Test whether a unicode is equal to ASCII identifier. Return 1 if true, - 0 otherwise. The right argument must be ASCII identifier. - Any error occurs inside will be cleared before return. */ -extern int _PyUnicode_EqualToASCIIId( - PyObject *left, /* Left string */ - _Py_Identifier *right /* Right identifier */ - ); - // Test whether a unicode is equal to ASCII string. Return 1 if true, // 0 otherwise. The right argument must be ASCII-encoded string. // Any error occurs inside will be cleared before return. diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index fe527fae8f5..0b1bc279722 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1744,6 +1744,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(gc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(generation); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2316,6 +2320,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(native); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ndigits); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2956,6 +2964,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(take_bytes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(target); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -3248,6 +3260,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(gc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_null); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -3272,6 +3288,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(native); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_setcomp); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Include/internal/pycore_uop.h b/Include/internal/pycore_uop.h index 4abefd3b95d..70576046385 100644 --- a/Include/internal/pycore_uop.h +++ b/Include/internal/pycore_uop.h @@ -35,10 +35,23 @@ typedef struct _PyUOpInstruction{ #endif } _PyUOpInstruction; -// This is the length of the trace we project initially. -#define UOP_MAX_TRACE_LENGTH 1200 +// This is the length of the trace we translate initially. +#ifdef Py_DEBUG + // With asserts, the stencils are a lot larger +#define UOP_MAX_TRACE_LENGTH 1000 +#else +#define UOP_MAX_TRACE_LENGTH 3000 +#endif #define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction)) +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ +#define _Py_BLOOM_FILTER_WORDS 8 + +typedef struct { + uint32_t bits[_Py_BLOOM_FILTER_WORDS]; +} _PyBloomFilter; + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index ff1d75c0cb1..c38f28f9db1 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -81,101 +81,107 @@ extern "C" { #define _CHECK_STACK_SPACE 357 #define _CHECK_STACK_SPACE_OPERAND 358 #define _CHECK_VALIDITY 359 -#define _COLD_EXIT 360 -#define _COMPARE_OP 361 -#define _COMPARE_OP_FLOAT 362 -#define _COMPARE_OP_INT 363 -#define _COMPARE_OP_STR 364 -#define _CONTAINS_OP 365 -#define _CONTAINS_OP_DICT 366 -#define _CONTAINS_OP_SET 367 +#define _COLD_DYNAMIC_EXIT 360 +#define _COLD_EXIT 361 +#define _COMPARE_OP 362 +#define _COMPARE_OP_FLOAT 363 +#define _COMPARE_OP_INT 364 +#define _COMPARE_OP_STR 365 +#define _CONTAINS_OP 366 +#define _CONTAINS_OP_DICT 367 +#define _CONTAINS_OP_SET 368 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY 368 -#define _COPY_1 369 -#define _COPY_2 370 -#define _COPY_3 371 +#define _COPY 369 +#define _COPY_1 370 +#define _COPY_2 371 +#define _COPY_3 372 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 372 +#define _CREATE_INIT_FRAME 373 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 373 +#define _DEOPT 374 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 374 -#define _DO_CALL_FUNCTION_EX 375 -#define _DO_CALL_KW 376 +#define _DO_CALL 375 +#define _DO_CALL_FUNCTION_EX 376 +#define _DO_CALL_KW 377 +#define _DYNAMIC_EXIT 378 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 377 +#define _ERROR_POP_N 379 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 378 -#define _EXPAND_METHOD_KW 379 -#define _FATAL_ERROR 380 +#define _EXPAND_METHOD 380 +#define _EXPAND_METHOD_KW 381 +#define _FATAL_ERROR 382 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 381 -#define _FOR_ITER_GEN_FRAME 382 -#define _FOR_ITER_TIER_TWO 383 +#define _FOR_ITER 383 +#define _FOR_ITER_GEN_FRAME 384 +#define _FOR_ITER_TIER_TWO 385 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 384 -#define _GUARD_CALLABLE_ISINSTANCE 385 -#define _GUARD_CALLABLE_LEN 386 -#define _GUARD_CALLABLE_LIST_APPEND 387 -#define _GUARD_CALLABLE_STR_1 388 -#define _GUARD_CALLABLE_TUPLE_1 389 -#define _GUARD_CALLABLE_TYPE_1 390 -#define _GUARD_DORV_NO_DICT 391 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 392 -#define _GUARD_GLOBALS_VERSION 393 -#define _GUARD_IS_FALSE_POP 394 -#define _GUARD_IS_NONE_POP 395 -#define _GUARD_IS_NOT_NONE_POP 396 -#define _GUARD_IS_TRUE_POP 397 -#define _GUARD_KEYS_VERSION 398 -#define _GUARD_NOS_DICT 399 -#define _GUARD_NOS_FLOAT 400 -#define _GUARD_NOS_INT 401 -#define _GUARD_NOS_LIST 402 -#define _GUARD_NOS_NOT_NULL 403 -#define _GUARD_NOS_NULL 404 -#define _GUARD_NOS_OVERFLOWED 405 -#define _GUARD_NOS_TUPLE 406 -#define _GUARD_NOS_UNICODE 407 -#define _GUARD_NOT_EXHAUSTED_LIST 408 -#define _GUARD_NOT_EXHAUSTED_RANGE 409 -#define _GUARD_NOT_EXHAUSTED_TUPLE 410 -#define _GUARD_THIRD_NULL 411 -#define _GUARD_TOS_ANY_SET 412 -#define _GUARD_TOS_DICT 413 -#define _GUARD_TOS_FLOAT 414 -#define _GUARD_TOS_INT 415 -#define _GUARD_TOS_LIST 416 -#define _GUARD_TOS_OVERFLOWED 417 -#define _GUARD_TOS_SLICE 418 -#define _GUARD_TOS_TUPLE 419 -#define _GUARD_TOS_UNICODE 420 -#define _GUARD_TYPE_VERSION 421 -#define _GUARD_TYPE_VERSION_AND_LOCK 422 -#define _HANDLE_PENDING_AND_DEOPT 423 +#define _GUARD_BINARY_OP_EXTEND 386 +#define _GUARD_CALLABLE_ISINSTANCE 387 +#define _GUARD_CALLABLE_LEN 388 +#define _GUARD_CALLABLE_LIST_APPEND 389 +#define _GUARD_CALLABLE_STR_1 390 +#define _GUARD_CALLABLE_TUPLE_1 391 +#define _GUARD_CALLABLE_TYPE_1 392 +#define _GUARD_DORV_NO_DICT 393 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 394 +#define _GUARD_GLOBALS_VERSION 395 +#define _GUARD_IP_RETURN_GENERATOR 396 +#define _GUARD_IP_RETURN_VALUE 397 +#define _GUARD_IP_YIELD_VALUE 398 +#define _GUARD_IP__PUSH_FRAME 399 +#define _GUARD_IS_FALSE_POP 400 +#define _GUARD_IS_NONE_POP 401 +#define _GUARD_IS_NOT_NONE_POP 402 +#define _GUARD_IS_TRUE_POP 403 +#define _GUARD_KEYS_VERSION 404 +#define _GUARD_NOS_DICT 405 +#define _GUARD_NOS_FLOAT 406 +#define _GUARD_NOS_INT 407 +#define _GUARD_NOS_LIST 408 +#define _GUARD_NOS_NOT_NULL 409 +#define _GUARD_NOS_NULL 410 +#define _GUARD_NOS_OVERFLOWED 411 +#define _GUARD_NOS_TUPLE 412 +#define _GUARD_NOS_UNICODE 413 +#define _GUARD_NOT_EXHAUSTED_LIST 414 +#define _GUARD_NOT_EXHAUSTED_RANGE 415 +#define _GUARD_NOT_EXHAUSTED_TUPLE 416 +#define _GUARD_THIRD_NULL 417 +#define _GUARD_TOS_ANY_SET 418 +#define _GUARD_TOS_DICT 419 +#define _GUARD_TOS_FLOAT 420 +#define _GUARD_TOS_INT 421 +#define _GUARD_TOS_LIST 422 +#define _GUARD_TOS_OVERFLOWED 423 +#define _GUARD_TOS_SLICE 424 +#define _GUARD_TOS_TUPLE 425 +#define _GUARD_TOS_UNICODE 426 +#define _GUARD_TYPE_VERSION 427 +#define _GUARD_TYPE_VERSION_AND_LOCK 428 +#define _HANDLE_PENDING_AND_DEOPT 429 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 424 -#define _INIT_CALL_PY_EXACT_ARGS 425 -#define _INIT_CALL_PY_EXACT_ARGS_0 426 -#define _INIT_CALL_PY_EXACT_ARGS_1 427 -#define _INIT_CALL_PY_EXACT_ARGS_2 428 -#define _INIT_CALL_PY_EXACT_ARGS_3 429 -#define _INIT_CALL_PY_EXACT_ARGS_4 430 -#define _INSERT_NULL 431 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 430 +#define _INIT_CALL_PY_EXACT_ARGS 431 +#define _INIT_CALL_PY_EXACT_ARGS_0 432 +#define _INIT_CALL_PY_EXACT_ARGS_1 433 +#define _INIT_CALL_PY_EXACT_ARGS_2 434 +#define _INIT_CALL_PY_EXACT_ARGS_3 435 +#define _INIT_CALL_PY_EXACT_ARGS_4 436 +#define _INSERT_NULL 437 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -185,177 +191,179 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 432 +#define _IS_NONE 438 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 433 -#define _ITER_CHECK_RANGE 434 -#define _ITER_CHECK_TUPLE 435 -#define _ITER_JUMP_LIST 436 -#define _ITER_JUMP_RANGE 437 -#define _ITER_JUMP_TUPLE 438 -#define _ITER_NEXT_LIST 439 -#define _ITER_NEXT_LIST_TIER_TWO 440 -#define _ITER_NEXT_RANGE 441 -#define _ITER_NEXT_TUPLE 442 -#define _JUMP_TO_TOP 443 +#define _ITER_CHECK_LIST 439 +#define _ITER_CHECK_RANGE 440 +#define _ITER_CHECK_TUPLE 441 +#define _ITER_JUMP_LIST 442 +#define _ITER_JUMP_RANGE 443 +#define _ITER_JUMP_TUPLE 444 +#define _ITER_NEXT_LIST 445 +#define _ITER_NEXT_LIST_TIER_TWO 446 +#define _ITER_NEXT_RANGE 447 +#define _ITER_NEXT_TUPLE 448 +#define _JUMP_BACKWARD_NO_INTERRUPT JUMP_BACKWARD_NO_INTERRUPT +#define _JUMP_TO_TOP 449 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 444 -#define _LOAD_ATTR_CLASS 445 +#define _LOAD_ATTR 450 +#define _LOAD_ATTR_CLASS 451 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 446 -#define _LOAD_ATTR_METHOD_LAZY_DICT 447 -#define _LOAD_ATTR_METHOD_NO_DICT 448 -#define _LOAD_ATTR_METHOD_WITH_VALUES 449 -#define _LOAD_ATTR_MODULE 450 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 451 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 452 -#define _LOAD_ATTR_PROPERTY_FRAME 453 -#define _LOAD_ATTR_SLOT 454 -#define _LOAD_ATTR_WITH_HINT 455 +#define _LOAD_ATTR_INSTANCE_VALUE 452 +#define _LOAD_ATTR_METHOD_LAZY_DICT 453 +#define _LOAD_ATTR_METHOD_NO_DICT 454 +#define _LOAD_ATTR_METHOD_WITH_VALUES 455 +#define _LOAD_ATTR_MODULE 456 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 457 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 458 +#define _LOAD_ATTR_PROPERTY_FRAME 459 +#define _LOAD_ATTR_SLOT 460 +#define _LOAD_ATTR_WITH_HINT 461 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 456 +#define _LOAD_BYTECODE 462 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 457 -#define _LOAD_CONST_INLINE_BORROW 458 -#define _LOAD_CONST_UNDER_INLINE 459 -#define _LOAD_CONST_UNDER_INLINE_BORROW 460 +#define _LOAD_CONST_INLINE 463 +#define _LOAD_CONST_INLINE_BORROW 464 +#define _LOAD_CONST_UNDER_INLINE 465 +#define _LOAD_CONST_UNDER_INLINE_BORROW 466 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 461 -#define _LOAD_FAST_0 462 -#define _LOAD_FAST_1 463 -#define _LOAD_FAST_2 464 -#define _LOAD_FAST_3 465 -#define _LOAD_FAST_4 466 -#define _LOAD_FAST_5 467 -#define _LOAD_FAST_6 468 -#define _LOAD_FAST_7 469 +#define _LOAD_FAST 467 +#define _LOAD_FAST_0 468 +#define _LOAD_FAST_1 469 +#define _LOAD_FAST_2 470 +#define _LOAD_FAST_3 471 +#define _LOAD_FAST_4 472 +#define _LOAD_FAST_5 473 +#define _LOAD_FAST_6 474 +#define _LOAD_FAST_7 475 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 470 -#define _LOAD_FAST_BORROW_0 471 -#define _LOAD_FAST_BORROW_1 472 -#define _LOAD_FAST_BORROW_2 473 -#define _LOAD_FAST_BORROW_3 474 -#define _LOAD_FAST_BORROW_4 475 -#define _LOAD_FAST_BORROW_5 476 -#define _LOAD_FAST_BORROW_6 477 -#define _LOAD_FAST_BORROW_7 478 +#define _LOAD_FAST_BORROW 476 +#define _LOAD_FAST_BORROW_0 477 +#define _LOAD_FAST_BORROW_1 478 +#define _LOAD_FAST_BORROW_2 479 +#define _LOAD_FAST_BORROW_3 480 +#define _LOAD_FAST_BORROW_4 481 +#define _LOAD_FAST_BORROW_5 482 +#define _LOAD_FAST_BORROW_6 483 +#define _LOAD_FAST_BORROW_7 484 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 479 -#define _LOAD_GLOBAL_BUILTINS 480 -#define _LOAD_GLOBAL_MODULE 481 +#define _LOAD_GLOBAL 485 +#define _LOAD_GLOBAL_BUILTINS 486 +#define _LOAD_GLOBAL_MODULE 487 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 482 -#define _LOAD_SMALL_INT_0 483 -#define _LOAD_SMALL_INT_1 484 -#define _LOAD_SMALL_INT_2 485 -#define _LOAD_SMALL_INT_3 486 -#define _LOAD_SPECIAL 487 +#define _LOAD_SMALL_INT 488 +#define _LOAD_SMALL_INT_0 489 +#define _LOAD_SMALL_INT_1 490 +#define _LOAD_SMALL_INT_2 491 +#define _LOAD_SMALL_INT_3 492 +#define _LOAD_SPECIAL 493 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 488 +#define _MAKE_CALLARGS_A_TUPLE 494 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 489 +#define _MAKE_WARM 495 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 490 -#define _MAYBE_EXPAND_METHOD_KW 491 -#define _MONITOR_CALL 492 -#define _MONITOR_CALL_KW 493 -#define _MONITOR_JUMP_BACKWARD 494 -#define _MONITOR_RESUME 495 +#define _MAYBE_EXPAND_METHOD 496 +#define _MAYBE_EXPAND_METHOD_KW 497 +#define _MONITOR_CALL 498 +#define _MONITOR_CALL_KW 499 +#define _MONITOR_JUMP_BACKWARD 500 +#define _MONITOR_RESUME 501 #define _NOP NOP -#define _POP_CALL 496 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 497 -#define _POP_CALL_ONE 498 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 499 -#define _POP_CALL_TWO 500 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 501 +#define _POP_CALL 502 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 503 +#define _POP_CALL_ONE 504 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 505 +#define _POP_CALL_TWO 506 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 507 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 502 -#define _POP_JUMP_IF_TRUE 503 +#define _POP_JUMP_IF_FALSE 508 +#define _POP_JUMP_IF_TRUE 509 #define _POP_TOP POP_TOP -#define _POP_TOP_FLOAT 504 -#define _POP_TOP_INT 505 -#define _POP_TOP_LOAD_CONST_INLINE 506 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 507 -#define _POP_TOP_NOP 508 -#define _POP_TOP_UNICODE 509 -#define _POP_TWO 510 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 511 +#define _POP_TOP_FLOAT 510 +#define _POP_TOP_INT 511 +#define _POP_TOP_LOAD_CONST_INLINE 512 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 513 +#define _POP_TOP_NOP 514 +#define _POP_TOP_UNICODE 515 +#define _POP_TWO 516 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 517 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 512 +#define _PUSH_FRAME 518 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 513 -#define _PY_FRAME_GENERAL 514 -#define _PY_FRAME_KW 515 -#define _QUICKEN_RESUME 516 -#define _REPLACE_WITH_TRUE 517 +#define _PUSH_NULL_CONDITIONAL 519 +#define _PY_FRAME_GENERAL 520 +#define _PY_FRAME_KW 521 +#define _QUICKEN_RESUME 522 +#define _REPLACE_WITH_TRUE 523 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 518 -#define _SEND 519 -#define _SEND_GEN_FRAME 520 +#define _SAVE_RETURN_OFFSET 524 +#define _SEND 525 +#define _SEND_GEN_FRAME 526 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 521 -#define _STORE_ATTR 522 -#define _STORE_ATTR_INSTANCE_VALUE 523 -#define _STORE_ATTR_SLOT 524 -#define _STORE_ATTR_WITH_HINT 525 +#define _START_EXECUTOR 527 +#define _STORE_ATTR 528 +#define _STORE_ATTR_INSTANCE_VALUE 529 +#define _STORE_ATTR_SLOT 530 +#define _STORE_ATTR_WITH_HINT 531 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 526 -#define _STORE_FAST_0 527 -#define _STORE_FAST_1 528 -#define _STORE_FAST_2 529 -#define _STORE_FAST_3 530 -#define _STORE_FAST_4 531 -#define _STORE_FAST_5 532 -#define _STORE_FAST_6 533 -#define _STORE_FAST_7 534 +#define _STORE_FAST 532 +#define _STORE_FAST_0 533 +#define _STORE_FAST_1 534 +#define _STORE_FAST_2 535 +#define _STORE_FAST_3 536 +#define _STORE_FAST_4 537 +#define _STORE_FAST_5 538 +#define _STORE_FAST_6 539 +#define _STORE_FAST_7 540 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 535 -#define _STORE_SUBSCR 536 -#define _STORE_SUBSCR_DICT 537 -#define _STORE_SUBSCR_LIST_INT 538 -#define _SWAP 539 -#define _SWAP_2 540 -#define _SWAP_3 541 -#define _TIER2_RESUME_CHECK 542 -#define _TO_BOOL 543 +#define _STORE_SLICE 541 +#define _STORE_SUBSCR 542 +#define _STORE_SUBSCR_DICT 543 +#define _STORE_SUBSCR_LIST_INT 544 +#define _SWAP 545 +#define _SWAP_2 546 +#define _SWAP_3 547 +#define _TIER2_RESUME_CHECK 548 +#define _TO_BOOL 549 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 544 +#define _TO_BOOL_LIST 550 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 545 +#define _TO_BOOL_STR 551 +#define _TRACE_RECORD TRACE_RECORD #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 546 -#define _UNPACK_SEQUENCE_LIST 547 -#define _UNPACK_SEQUENCE_TUPLE 548 -#define _UNPACK_SEQUENCE_TWO_TUPLE 549 +#define _UNPACK_SEQUENCE 552 +#define _UNPACK_SEQUENCE_LIST 553 +#define _UNPACK_SEQUENCE_TUPLE 554 +#define _UNPACK_SEQUENCE_TWO_TUPLE 555 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 549 +#define MAX_UOP_ID 555 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 12487719969..d5a3c362d87 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -11,7 +11,7 @@ extern "C" { #include #include "pycore_uop_ids.h" -extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1]; +extern const uint32_t _PyUop_Flags[MAX_UOP_ID+1]; typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange; extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1]; extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; @@ -19,7 +19,7 @@ extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; extern int _PyUop_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA -const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { +const uint32_t _PyUop_Flags[MAX_UOP_ID+1] = { [_NOP] = HAS_PURE_FLAG, [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -128,12 +128,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_RETURN_VALUE] = HAS_ESCAPES_FLAG, + [_RETURN_VALUE] = HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG, [_GET_AITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GET_ANEXT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_GET_AWAITABLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_SEND_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_YIELD_VALUE] = HAS_ARG_FLAG, + [_YIELD_VALUE] = HAS_ARG_FLAG | HAS_NEEDS_GUARD_IP_FLAG, [_POP_EXCEPT] = HAS_ESCAPES_FLAG, [_LOAD_COMMON_CONSTANT] = HAS_ARG_FLAG, [_LOAD_BUILD_CLASS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -256,7 +256,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, - [_PUSH_FRAME] = 0, + [_PUSH_FRAME] = HAS_NEEDS_GUARD_IP_FLAG, [_GUARD_NOS_NULL] = HAS_DEOPT_FLAG, [_GUARD_NOS_NOT_NULL] = HAS_EXIT_FLAG, [_GUARD_THIRD_NULL] = HAS_DEOPT_FLAG, @@ -293,7 +293,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_MAKE_CALLARGS_A_TUPLE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG, - [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_NEEDS_GUARD_IP_FLAG, [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -315,6 +315,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_ESCAPES_FLAG, + [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -336,7 +337,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_HANDLE_PENDING_AND_DEOPT] = HAS_ESCAPES_FLAG, [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_PERIODIC_FLAG, - [_COLD_EXIT] = HAS_ESCAPES_FLAG, + [_COLD_EXIT] = 0, + [_COLD_DYNAMIC_EXIT] = 0, + [_GUARD_IP__PUSH_FRAME] = HAS_EXIT_FLAG, + [_GUARD_IP_YIELD_VALUE] = HAS_EXIT_FLAG, + [_GUARD_IP_RETURN_VALUE] = HAS_EXIT_FLAG, + [_GUARD_IP_RETURN_GENERATOR] = HAS_EXIT_FLAG, }; const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { @@ -419,6 +425,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_COLD_DYNAMIC_EXIT] = "_COLD_DYNAMIC_EXIT", [_COLD_EXIT] = "_COLD_EXIT", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", @@ -443,6 +450,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_DEOPT] = "_DEOPT", [_DICT_MERGE] = "_DICT_MERGE", [_DICT_UPDATE] = "_DICT_UPDATE", + [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT", [_END_FOR] = "_END_FOR", [_END_SEND] = "_END_SEND", [_ERROR_POP_N] = "_ERROR_POP_N", @@ -471,6 +479,10 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", + [_GUARD_IP_RETURN_GENERATOR] = "_GUARD_IP_RETURN_GENERATOR", + [_GUARD_IP_RETURN_VALUE] = "_GUARD_IP_RETURN_VALUE", + [_GUARD_IP_YIELD_VALUE] = "_GUARD_IP_YIELD_VALUE", + [_GUARD_IP__PUSH_FRAME] = "_GUARD_IP__PUSH_FRAME", [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", @@ -1261,6 +1273,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _EXIT_TRACE: return 0; + case _DYNAMIC_EXIT: + return 0; case _CHECK_VALIDITY: return 0; case _LOAD_CONST_INLINE: @@ -1305,6 +1319,16 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _COLD_EXIT: return 0; + case _COLD_DYNAMIC_EXIT: + return 0; + case _GUARD_IP__PUSH_FRAME: + return 0; + case _GUARD_IP_YIELD_VALUE: + return 0; + case _GUARD_IP_RETURN_VALUE: + return 0; + case _GUARD_IP_RETURN_GENERATOR: + return 0; default: return -1; } diff --git a/Include/modsupport.h b/Include/modsupport.h index 094b9ff0e5c..cb47ad8cd27 100644 --- a/Include/modsupport.h +++ b/Include/modsupport.h @@ -132,7 +132,7 @@ PyAPI_FUNC(int) PyABIInfo_Check(PyABIInfo *info, const char *module_name); ) \ ///////////////////////////////////////////////////////// -#define _PyABIInfo_DEFAULT() { \ +#define _PyABIInfo_DEFAULT { \ 1, 0, \ PyABIInfo_DEFAULT_FLAGS, \ PY_VERSION_HEX, \ diff --git a/Include/moduleobject.h b/Include/moduleobject.h index e3afac0a343..e83bc395aa4 100644 --- a/Include/moduleobject.h +++ b/Include/moduleobject.h @@ -83,11 +83,19 @@ struct PyModuleDef_Slot { #endif #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 15) # define Py_mod_abi 5 +# define Py_mod_name 6 +# define Py_mod_doc 7 +# define Py_mod_state_size 8 +# define Py_mod_methods 9 +# define Py_mod_state_traverse 10 +# define Py_mod_state_clear 11 +# define Py_mod_state_free 12 +# define Py_mod_token 13 #endif #ifndef Py_LIMITED_API -#define _Py_mod_LAST_SLOT 5 +#define _Py_mod_LAST_SLOT 13 #endif #endif /* New in 3.5 */ @@ -109,6 +117,13 @@ struct PyModuleDef_Slot { PyAPI_FUNC(int) PyUnstable_Module_SetGIL(PyObject *module, void *gil); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 15) +PyAPI_FUNC(PyObject *) PyModule_FromSlotsAndSpec(const PyModuleDef_Slot *, + PyObject *spec); +PyAPI_FUNC(int) PyModule_Exec(PyObject *mod); +PyAPI_FUNC(int) PyModule_GetStateSize(PyObject *mod, Py_ssize_t *result); +PyAPI_FUNC(int) PyModule_GetToken(PyObject *, void **result); +#endif #ifndef _Py_OPAQUE_PYOBJECT struct PyModuleDef { diff --git a/Include/object.h b/Include/object.h index 291e4f0a7ed..ad452be8405 100644 --- a/Include/object.h +++ b/Include/object.h @@ -140,12 +140,12 @@ struct _object { # endif }; #else - Py_ssize_t ob_refcnt; + Py_ssize_t ob_refcnt; // part of stable ABI; do not change #endif _Py_ALIGNED_DEF(_PyObject_MIN_ALIGNMENT, char) _aligner; }; - PyTypeObject *ob_type; + PyTypeObject *ob_type; // part of stable ABI; do not change }; #else // Objects that are not owned by any thread use a thread id (tid) of zero. @@ -173,7 +173,7 @@ struct _object { #ifndef _Py_OPAQUE_PYOBJECT struct PyVarObject { PyObject ob_base; - Py_ssize_t ob_size; /* Number of items in variable part */ + Py_ssize_t ob_size; // Number of items in variable part. Part of stable ABI }; #endif typedef struct PyVarObject PyVarObject; @@ -265,56 +265,72 @@ _Py_IsOwnedByCurrentThread(PyObject *ob) } #endif -// Py_TYPE() implementation for the stable ABI -PyAPI_FUNC(PyTypeObject*) Py_TYPE(PyObject *ob); - -#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030e0000 - // Stable ABI implements Py_TYPE() as a function call - // on limited C API version 3.14 and newer. -#else - static inline PyTypeObject* _Py_TYPE(PyObject *ob) - { - return ob->ob_type; - } - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 - # define Py_TYPE(ob) _Py_TYPE(_PyObject_CAST(ob)) - #else - # define Py_TYPE(ob) _Py_TYPE(ob) - #endif -#endif - PyAPI_DATA(PyTypeObject) PyLong_Type; PyAPI_DATA(PyTypeObject) PyBool_Type; +/* Definitions for the stable ABI */ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 14) +PyAPI_FUNC(PyTypeObject*) Py_TYPE(PyObject *ob); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 15) +PyAPI_FUNC(Py_ssize_t) Py_SIZE(PyObject *ob); +PyAPI_FUNC(int) Py_IS_TYPE(PyObject *ob, PyTypeObject *type); +PyAPI_FUNC(void) Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size); +#endif + #ifndef _Py_OPAQUE_PYOBJECT + +static inline void +Py_SET_TYPE(PyObject *ob, PyTypeObject *type) +{ + ob->ob_type = type; +} + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 11) +// Non-limited API & limited API 3.11 & below: use static inline functions and +// use _PyObject_CAST so that users don't need their own casts +# define Py_TYPE(ob) _Py_TYPE_impl(_PyObject_CAST(ob)) +# define Py_SIZE(ob) _Py_SIZE_impl(_PyObject_CAST(ob)) +# define Py_IS_TYPE(ob, type) _Py_IS_TYPE_impl(_PyObject_CAST(ob), (type)) +# define Py_SET_SIZE(ob, size) _Py_SET_SIZE_impl(_PyVarObject_CAST(ob), (size)) +# define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type) +#elif Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 15) +// Limited API 3.11-3.14: use static inline functions, without casts +# define Py_SIZE(ob) _Py_SIZE_impl(ob) +# define Py_IS_TYPE(ob, type) _Py_IS_TYPE_impl((ob), (type)) +# define Py_SET_SIZE(ob, size) _Py_SET_SIZE_impl((ob), (size)) +# if Py_LIMITED_API+0 < _Py_PACK_VERSION(3, 14) +// Py_TYPE() is static inline only on Limited API 3.13 and below +# define Py_TYPE(ob) _Py_TYPE_impl(ob) +# endif +#else +// Limited API 3.15+: use function calls +#endif + +static inline +PyTypeObject* _Py_TYPE_impl(PyObject *ob) +{ + return ob->ob_type; +} + // bpo-39573: The Py_SET_SIZE() function must be used to set an object size. -static inline Py_ssize_t Py_SIZE(PyObject *ob) { +static inline Py_ssize_t +_Py_SIZE_impl(PyObject *ob) +{ assert(Py_TYPE(ob) != &PyLong_Type); assert(Py_TYPE(ob) != &PyBool_Type); return _PyVarObject_CAST(ob)->ob_size; } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SIZE(ob) Py_SIZE(_PyObject_CAST(ob)) -#endif -#endif // !defined(_Py_OPAQUE_PYOBJECT) -static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { +static inline int +_Py_IS_TYPE_impl(PyObject *ob, PyTypeObject *type) +{ return Py_TYPE(ob) == type; } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_IS_TYPE(ob, type) Py_IS_TYPE(_PyObject_CAST(ob), (type)) -#endif - -#ifndef _Py_OPAQUE_PYOBJECT -static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { - ob->ob_type = type; -} -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type) -#endif - -static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { +static inline void +_Py_SET_SIZE_impl(PyVarObject *ob, Py_ssize_t size) +{ assert(Py_TYPE(_PyObject_CAST(ob)) != &PyLong_Type); assert(Py_TYPE(_PyObject_CAST(ob)) != &PyBool_Type); #ifdef Py_GIL_DISABLED @@ -323,9 +339,7 @@ static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { ob->ob_size = size; #endif } -#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 -# define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size)) -#endif + #endif // !defined(_Py_OPAQUE_PYOBJECT) @@ -839,6 +853,11 @@ PyAPI_FUNC(PyObject *) PyType_GetModuleByDef(PyTypeObject *, PyModuleDef *); PyAPI_FUNC(int) PyType_Freeze(PyTypeObject *type); #endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= _Py_PACK_VERSION(3, 15) +PyAPI_FUNC(PyObject *) PyType_GetModuleByToken(PyTypeObject *type, + const void *token); +#endif + #ifdef __cplusplus } #endif diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index 1d5c74adefc..0d066c16901 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -213,28 +213,29 @@ extern "C" { #define UNPACK_SEQUENCE_LIST 207 #define UNPACK_SEQUENCE_TUPLE 208 #define UNPACK_SEQUENCE_TWO_TUPLE 209 -#define INSTRUMENTED_END_FOR 234 -#define INSTRUMENTED_POP_ITER 235 -#define INSTRUMENTED_END_SEND 236 -#define INSTRUMENTED_FOR_ITER 237 -#define INSTRUMENTED_INSTRUCTION 238 -#define INSTRUMENTED_JUMP_FORWARD 239 -#define INSTRUMENTED_NOT_TAKEN 240 -#define INSTRUMENTED_POP_JUMP_IF_TRUE 241 -#define INSTRUMENTED_POP_JUMP_IF_FALSE 242 -#define INSTRUMENTED_POP_JUMP_IF_NONE 243 -#define INSTRUMENTED_POP_JUMP_IF_NOT_NONE 244 -#define INSTRUMENTED_RESUME 245 -#define INSTRUMENTED_RETURN_VALUE 246 -#define INSTRUMENTED_YIELD_VALUE 247 -#define INSTRUMENTED_END_ASYNC_FOR 248 -#define INSTRUMENTED_LOAD_SUPER_ATTR 249 -#define INSTRUMENTED_CALL 250 -#define INSTRUMENTED_CALL_KW 251 -#define INSTRUMENTED_CALL_FUNCTION_EX 252 -#define INSTRUMENTED_JUMP_BACKWARD 253 -#define INSTRUMENTED_LINE 254 -#define ENTER_EXECUTOR 255 +#define INSTRUMENTED_END_FOR 233 +#define INSTRUMENTED_POP_ITER 234 +#define INSTRUMENTED_END_SEND 235 +#define INSTRUMENTED_FOR_ITER 236 +#define INSTRUMENTED_INSTRUCTION 237 +#define INSTRUMENTED_JUMP_FORWARD 238 +#define INSTRUMENTED_NOT_TAKEN 239 +#define INSTRUMENTED_POP_JUMP_IF_TRUE 240 +#define INSTRUMENTED_POP_JUMP_IF_FALSE 241 +#define INSTRUMENTED_POP_JUMP_IF_NONE 242 +#define INSTRUMENTED_POP_JUMP_IF_NOT_NONE 243 +#define INSTRUMENTED_RESUME 244 +#define INSTRUMENTED_RETURN_VALUE 245 +#define INSTRUMENTED_YIELD_VALUE 246 +#define INSTRUMENTED_END_ASYNC_FOR 247 +#define INSTRUMENTED_LOAD_SUPER_ATTR 248 +#define INSTRUMENTED_CALL 249 +#define INSTRUMENTED_CALL_KW 250 +#define INSTRUMENTED_CALL_FUNCTION_EX 251 +#define INSTRUMENTED_JUMP_BACKWARD 252 +#define INSTRUMENTED_LINE 253 +#define ENTER_EXECUTOR 254 +#define TRACE_RECORD 255 #define ANNOTATIONS_PLACEHOLDER 256 #define JUMP 257 #define JUMP_IF_FALSE 258 @@ -249,7 +250,7 @@ extern "C" { #define HAVE_ARGUMENT 43 #define MIN_SPECIALIZED_OPCODE 129 -#define MIN_INSTRUMENTED_OPCODE 234 +#define MIN_INSTRUMENTED_OPCODE 233 #ifdef __cplusplus } diff --git a/Include/patchlevel.h b/Include/patchlevel.h index e3996ee8679..804aa1a0427 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -24,10 +24,10 @@ #define PY_MINOR_VERSION 15 #define PY_MICRO_VERSION 0 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_ALPHA -#define PY_RELEASE_SERIAL 1 +#define PY_RELEASE_SERIAL 2 /* Version as a string */ -#define PY_VERSION "3.15.0a1+" +#define PY_VERSION "3.15.0a2+" /*--end constants--*/ diff --git a/Include/pymacro.h b/Include/pymacro.h index 857cdf12db9..7ecce44a0d2 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -116,6 +116,12 @@ /* Absolute value of the number x */ #define Py_ABS(x) ((x) < 0 ? -(x) : (x)) +/* Safer implementation that avoids an undefined behavior for the minimal + value of the signed integer type if its absolute value is larger than + the maximal value of the signed integer type (in the two's complement + representations, which is common). + */ +#define _Py_ABS_CAST(T, x) ((x) >= 0 ? ((T) (x)) : ((T) (((T) -((x) + 1)) + 1u))) #define _Py_XSTRINGIFY(x) #x diff --git a/Include/pymath.h b/Include/pymath.h index e2919c7b527..7cfe441365d 100644 --- a/Include/pymath.h +++ b/Include/pymath.h @@ -7,6 +7,7 @@ /* High precision definition of pi and e (Euler) * The values are taken from libc6's math.h. */ +// Deprecated since Python 3.15. #ifndef Py_MATH_PIl #define Py_MATH_PIl 3.1415926535897932384626433832795029L #endif @@ -14,6 +15,7 @@ #define Py_MATH_PI 3.14159265358979323846 #endif +// Deprecated since Python 3.15. #ifndef Py_MATH_El #define Py_MATH_El 2.7182818284590452353602874713526625L #endif @@ -43,13 +45,14 @@ #define Py_IS_FINITE(X) isfinite(X) // Py_INFINITY: Value that evaluates to a positive double infinity. +// Soft deprecated since Python 3.15, use INFINITY instead. #ifndef Py_INFINITY # define Py_INFINITY ((double)INFINITY) #endif /* Py_HUGE_VAL should always be the same as Py_INFINITY. But historically * this was not reliable and Python did not require IEEE floats and C99 - * conformity. The macro was soft deprecated in Python 3.14, use Py_INFINITY instead. + * conformity. The macro was soft deprecated in Python 3.14, use INFINITY instead. */ #ifndef Py_HUGE_VAL # define Py_HUGE_VAL HUGE_VAL diff --git a/Include/pyport.h b/Include/pyport.h index e77b39026a5..61e2317976e 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -504,13 +504,20 @@ extern "C" { * Thread support is stubbed and any attempt to create a new thread fails. */ #if (!defined(HAVE_PTHREAD_STUBS) && \ + !defined(__wasi__) && \ (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))) # define Py_CAN_START_THREADS 1 #endif -#ifdef WITH_THREAD -// HAVE_THREAD_LOCAL is just defined here for compatibility's sake + +/* gh-142163: Some libraries rely on HAVE_THREAD_LOCAL being undefined, so + * we can only define it only when Py_BUILD_CORE is set.*/ +#ifdef Py_BUILD_CORE +// This is no longer coupled to _Py_thread_local. # define HAVE_THREAD_LOCAL 1 +#endif + +#ifdef WITH_THREAD # ifdef thread_local # define _Py_thread_local thread_local # elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) @@ -677,4 +684,10 @@ extern "C" { #endif +// Assume the stack grows down unless specified otherwise +#ifndef _Py_STACK_GROWS_DOWN +# define _Py_STACK_GROWS_DOWN 1 +#endif + + #endif /* Py_PYPORT_H */ diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md index 09585380737..1740b22b85f 100644 --- a/InternalDocs/jit.md +++ b/InternalDocs/jit.md @@ -53,7 +53,7 @@ ## The micro-op optimizer ## The JIT interpreter After a `JUMP_BACKWARD` instruction invokes the uop optimizer to create a uop -executor, it transfers control to this executor via the `GOTO_TIER_TWO` macro. +executor, it transfers control to this executor via the `TIER1_TO_TIER2` macro. CPython implements two executors. Here we describe the JIT interpreter, which is the simpler of them and is therefore useful for debugging and analyzing diff --git a/InternalDocs/stack_protection.md b/InternalDocs/stack_protection.md index fa025bd930f..14802e57d09 100644 --- a/InternalDocs/stack_protection.md +++ b/InternalDocs/stack_protection.md @@ -38,12 +38,19 @@ # Stack Protection ```python kb_used = (stack_top - stack_pointer)>>10 -if stack_pointer < hard_limit: +if stack_pointer < bottom_of_machine_stack: + pass # Our stack limits could be wrong so it is safest to do nothing. +elif stack_pointer < hard_limit: FatalError(f"Unrecoverable stack overflow (used {kb_used} kB)") elif stack_pointer < soft_limit: raise RecursionError(f"Stack overflow (used {kb_used} kB)") ``` +### User space threads and other oddities + +Some libraries provide user-space threads. These will change the C stack at runtime. +To guard against this we only raise if the stack pointer is in the window between the expected stack base and the soft limit. + ### Diagnosing and fixing stack overflows For stack protection to work correctly the amount of stack consumed between calls to `_Py_EnterRecursiveCall()` must be less than `_PyOS_STACK_MARGIN_BYTES`. diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 63e951d6488..29d7cc67b6e 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -1,4 +1,3 @@ -import io import os import sys @@ -169,7 +168,12 @@ class Argparse(ThemeSection): short_option: str = ANSIColors.BOLD_GREEN label: str = ANSIColors.BOLD_YELLOW action: str = ANSIColors.BOLD_GREEN + default: str = ANSIColors.GREY + default_value: str = ANSIColors.YELLOW reset: str = ANSIColors.RESET + error: str = ANSIColors.BOLD_MAGENTA + warning: str = ANSIColors.BOLD_YELLOW + message: str = ANSIColors.MAGENTA @dataclass(frozen=True, kw_only=True) @@ -327,7 +331,7 @@ def _safe_getenv(k: str, fallback: str | None = None) -> str | None: try: return os.isatty(file.fileno()) - except io.UnsupportedOperation: + except OSError: return hasattr(file, "isatty") and file.isatty() diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index f168d169a32..e681cb17e43 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -208,8 +208,9 @@ opmap = { 'CACHE': 0, 'RESERVED': 17, 'RESUME': 128, - 'INSTRUMENTED_LINE': 254, - 'ENTER_EXECUTOR': 255, + 'INSTRUMENTED_LINE': 253, + 'ENTER_EXECUTOR': 254, + 'TRACE_RECORD': 255, 'BINARY_SLICE': 1, 'BUILD_TEMPLATE': 2, 'CALL_FUNCTION_EX': 4, @@ -328,26 +329,26 @@ opmap = { 'UNPACK_EX': 118, 'UNPACK_SEQUENCE': 119, 'YIELD_VALUE': 120, - 'INSTRUMENTED_END_FOR': 234, - 'INSTRUMENTED_POP_ITER': 235, - 'INSTRUMENTED_END_SEND': 236, - 'INSTRUMENTED_FOR_ITER': 237, - 'INSTRUMENTED_INSTRUCTION': 238, - 'INSTRUMENTED_JUMP_FORWARD': 239, - 'INSTRUMENTED_NOT_TAKEN': 240, - 'INSTRUMENTED_POP_JUMP_IF_TRUE': 241, - 'INSTRUMENTED_POP_JUMP_IF_FALSE': 242, - 'INSTRUMENTED_POP_JUMP_IF_NONE': 243, - 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 244, - 'INSTRUMENTED_RESUME': 245, - 'INSTRUMENTED_RETURN_VALUE': 246, - 'INSTRUMENTED_YIELD_VALUE': 247, - 'INSTRUMENTED_END_ASYNC_FOR': 248, - 'INSTRUMENTED_LOAD_SUPER_ATTR': 249, - 'INSTRUMENTED_CALL': 250, - 'INSTRUMENTED_CALL_KW': 251, - 'INSTRUMENTED_CALL_FUNCTION_EX': 252, - 'INSTRUMENTED_JUMP_BACKWARD': 253, + 'INSTRUMENTED_END_FOR': 233, + 'INSTRUMENTED_POP_ITER': 234, + 'INSTRUMENTED_END_SEND': 235, + 'INSTRUMENTED_FOR_ITER': 236, + 'INSTRUMENTED_INSTRUCTION': 237, + 'INSTRUMENTED_JUMP_FORWARD': 238, + 'INSTRUMENTED_NOT_TAKEN': 239, + 'INSTRUMENTED_POP_JUMP_IF_TRUE': 240, + 'INSTRUMENTED_POP_JUMP_IF_FALSE': 241, + 'INSTRUMENTED_POP_JUMP_IF_NONE': 242, + 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 243, + 'INSTRUMENTED_RESUME': 244, + 'INSTRUMENTED_RETURN_VALUE': 245, + 'INSTRUMENTED_YIELD_VALUE': 246, + 'INSTRUMENTED_END_ASYNC_FOR': 247, + 'INSTRUMENTED_LOAD_SUPER_ATTR': 248, + 'INSTRUMENTED_CALL': 249, + 'INSTRUMENTED_CALL_KW': 250, + 'INSTRUMENTED_CALL_FUNCTION_EX': 251, + 'INSTRUMENTED_JUMP_BACKWARD': 252, 'ANNOTATIONS_PLACEHOLDER': 256, 'JUMP': 257, 'JUMP_IF_FALSE': 258, @@ -362,4 +363,4 @@ opmap = { } HAVE_ARGUMENT = 43 -MIN_INSTRUMENTED_OPCODE = 234 +MIN_INSTRUMENTED_OPCODE = 233 diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py index 91a9f44b201..67c74fdd2d0 100644 --- a/Lib/_py_warnings.py +++ b/Lib/_py_warnings.py @@ -646,6 +646,9 @@ def __str__(self): "line : %r}" % (self.message, self._category_name, self.filename, self.lineno, self.line)) + def __repr__(self): + return f'<{type(self).__qualname__} {self}>' + class catch_warnings(object): diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 423178e87a8..69a088df8fc 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -546,7 +546,7 @@ def nreadahead(): res += b if res.endswith(b"\n"): break - return bytes(res) + return res.take_bytes() def __iter__(self): self._checkClosed() @@ -620,7 +620,7 @@ def read(self, size=-1): if n < 0 or n > len(b): raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") del b[n:] - return bytes(b) + return b.take_bytes() def readall(self): """Read until EOF, using multiple read() call.""" @@ -628,7 +628,7 @@ def readall(self): while data := self.read(DEFAULT_BUFFER_SIZE): res += data if res: - return bytes(res) + return res.take_bytes() else: # b'' or None return data @@ -1738,7 +1738,7 @@ def readall(self): assert len(result) - bytes_read >= 1, \ "os.readinto buffer size 0 will result in erroneous EOF / returns 0" result.resize(bytes_read) - return bytes(result) + return result.take_bytes() def readinto(self, buffer): """Same as RawIOBase.readinto().""" diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index ff1bdab9fea..3b0debf2ba0 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -31,7 +31,6 @@ import sys import code import warnings -import errno from .readline import _get_reader, multiline_input, append_history_file diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index c56dcd6d7dd..f9f5988af0b 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -249,22 +249,10 @@ def input_hook(self): def __write_changed_line( self, y: int, oldline: str, newline: str, px_coord: int ) -> None: - # this is frustrating; there's no reason to test (say) - # self.dch1 inside the loop -- but alternative ways of - # structuring this function are equally painful (I'm trying to - # avoid writing code generators these days...) minlen = min(wlen(oldline), wlen(newline)) x_pos = 0 x_coord = 0 - px_pos = 0 - j = 0 - for c in oldline: - if j >= px_coord: - break - j += wlen(c) - px_pos += 1 - # reuse the oldline as much as possible, but stop as soon as we # encounter an ESCAPE, because it might be the start of an escape # sequence @@ -358,7 +346,6 @@ def prepare(self) -> None: self.height, self.width = self.getheightwidth() self.posxy = 0, 0 - self.__gone_tall = 0 self.__offset = 0 if self.__vt_support: diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index 2166dbff0ee..a5788cdbfae 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -150,33 +150,42 @@ def evaluate( if globals is None: globals = {} + if type_params is None and owner is not None: + type_params = getattr(owner, "__type_params__", None) + if locals is None: locals = {} if isinstance(owner, type): locals.update(vars(owner)) + elif ( + type_params is not None + or isinstance(self.__cell__, dict) + or self.__extra_names__ + ): + # Create a new locals dict if necessary, + # to avoid mutating the argument. + locals = dict(locals) - if type_params is None and owner is not None: - # "Inject" type parameters into the local namespace - # (unless they are shadowed by assignments *in* the local namespace), - # as a way of emulating annotation scopes when calling `eval()` - type_params = getattr(owner, "__type_params__", None) - - # Type parameters exist in their own scope, which is logically - # between the locals and the globals. We simulate this by adding - # them to the globals. Similar reasoning applies to nonlocals stored in cells. - if type_params is not None or isinstance(self.__cell__, dict): - globals = dict(globals) + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` if type_params is not None: for param in type_params: - globals[param.__name__] = param + locals.setdefault(param.__name__, param) + + # Similar logic can be used for nonlocals, which should not + # override locals. if isinstance(self.__cell__, dict): - for cell_name, cell_value in self.__cell__.items(): + for cell_name, cell in self.__cell__.items(): try: - globals[cell_name] = cell_value.cell_contents + cell_value = cell.cell_contents except ValueError: pass + else: + locals.setdefault(cell_name, cell_value) + if self.__extra_names__: - locals = {**locals, **self.__extra_names__} + locals.update(self.__extra_names__) arg = self.__forward_arg__ if arg.isidentifier() and not keyword.iskeyword(arg): @@ -835,14 +844,9 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): if not annotate.__closure__: return None, None - freevars = annotate.__code__.co_freevars new_closure = [] cell_dict = {} - for i, cell in enumerate(annotate.__closure__): - if i < len(freevars): - name = freevars[i] - else: - name = "__cell__" + for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True): cell_dict[name] = cell new_cell = None if allow_evaluation: diff --git a/Lib/argparse.py b/Lib/argparse.py index 1f4413a9897..07d7d77e884 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -166,7 +166,6 @@ def __init__( indent_increment=2, max_help_position=24, width=None, - color=True, ): # default setting for width if width is None: @@ -174,7 +173,6 @@ def __init__( width = shutil.get_terminal_size().columns width -= 2 - self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -355,8 +353,14 @@ def _format_usage(self, usage, actions, groups, prefix): if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts - opt_parts = self._get_actions_usage_parts(optionals, groups) - pos_parts = self._get_actions_usage_parts(positionals, groups) + # keep optionals and positionals together to preserve + # mutually exclusive group formatting (gh-75949) + all_actions = optionals + positionals + parts, pos_start = self._get_actions_usage_parts_with_split( + all_actions, groups, len(optionals) + ) + opt_parts = parts[:pos_start] + pos_parts = parts[pos_start:] # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -420,6 +424,17 @@ def _is_long_option(self, string): return len(string) > 2 def _get_actions_usage_parts(self, actions, groups): + parts, _ = self._get_actions_usage_parts_with_split(actions, groups) + return parts + + def _get_actions_usage_parts_with_split(self, actions, groups, opt_count=None): + """Get usage parts with split index for optionals/positionals. + + Returns (parts, pos_start) where pos_start is the index in parts + where positionals begin. When opt_count is None, pos_start is None. + This preserves mutually exclusive group formatting across the + optionals/positionals boundary (gh-75949). + """ # find group indices and identify actions in groups group_actions = set() inserts = {} @@ -515,8 +530,16 @@ def _get_actions_usage_parts(self, actions, groups): for i in range(start + group_size, end): parts[i] = None - # return the usage parts - return [item for item in parts if item is not None] + # if opt_count is provided, calculate where positionals start in + # the final parts list (for wrapping onto separate lines). + # Count before filtering None entries since indices shift after. + if opt_count is not None: + pos_start = sum(1 for p in parts[:opt_count] if p is not None) + else: + pos_start = None + + # return the usage parts and split point (gh-75949) + return [item for item in parts if item is not None], pos_start def _format_text(self, text): if '%(prog)' in text: @@ -748,7 +771,14 @@ def _get_help_string(self, action): if action.default is not SUPPRESS: defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] if action.option_strings or action.nargs in defaulting_nargs: - help += _(' (default: %(default)s)') + t = self._theme + default_str = _(" (default: %(default)s)") + prefix, suffix = default_str.split("%(default)s") + help += ( + f" {t.default}{prefix.lstrip()}" + f"{t.default_value}%(default)s" + f"{t.default}{suffix}{t.reset}" + ) return help @@ -932,15 +962,26 @@ def __init__(self, deprecated=False): _option_strings = [] + neg_option_strings = [] for option_string in option_strings: _option_strings.append(option_string) - if option_string.startswith('--'): - if option_string.startswith('--no-'): + if len(option_string) > 2 and option_string[0] == option_string[1]: + # two-dash long option: '--foo' -> '--no-foo' + if option_string.startswith('no-', 2): raise ValueError(f'invalid option name {option_string!r} ' f'for BooleanOptionalAction') - option_string = '--no-' + option_string[2:] + option_string = option_string[:2] + 'no-' + option_string[2:] _option_strings.append(option_string) + neg_option_strings.append(option_string) + elif len(option_string) > 2 and option_string[0] != option_string[1]: + # single-dash long option: '-foo' -> '-nofoo' + if option_string.startswith('no', 1): + raise ValueError(f'invalid option name {option_string!r} ' + f'for BooleanOptionalAction') + option_string = option_string[:1] + 'no' + option_string[1:] + _option_strings.append(option_string) + neg_option_strings.append(option_string) super().__init__( option_strings=_option_strings, @@ -950,11 +991,12 @@ def __init__(self, required=required, help=help, deprecated=deprecated) + self.neg_option_strings = neg_option_strings def __call__(self, parser, namespace, values, option_string=None): if option_string in self.option_strings: - setattr(namespace, self.dest, not option_string.startswith('--no-')) + setattr(namespace, self.dest, option_string not in self.neg_option_strings) def format_usage(self): return ' | '.join(self.option_strings) @@ -1551,8 +1593,8 @@ def add_argument(self, *args, **kwargs): f'instance of it must be passed') # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): - formatter = self._get_formatter() + if hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() try: formatter._format_args(action, None) except TypeError: @@ -1660,29 +1702,35 @@ def _get_positional_kwargs(self, dest, **kwargs): def _get_optional_kwargs(self, *args, **kwargs): # determine short and long option strings option_strings = [] - long_option_strings = [] for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: raise ValueError( f'invalid option string {option_string!r}: ' f'must start with a character {self.prefix_chars!r}') - - # strings starting with two prefix characters are long options option_strings.append(option_string) - if len(option_string) > 1 and option_string[1] in self.prefix_chars: - long_option_strings.append(option_string) # infer destination, '--foo-bar' -> 'foo_bar' and '-x' -> 'x' dest = kwargs.pop('dest', None) if dest is None: - if long_option_strings: - dest_option_string = long_option_strings[0] - else: - dest_option_string = option_strings[0] - dest = dest_option_string.lstrip(self.prefix_chars) + priority = 0 + for option_string in option_strings: + if len(option_string) <= 2: + # short option: '-x' -> 'x' + if priority < 1: + dest = option_string.lstrip(self.prefix_chars) + priority = 1 + elif option_string[1] not in self.prefix_chars: + # single-dash long option: '-foo' -> 'foo' + if priority < 2: + dest = option_string.lstrip(self.prefix_chars) + priority = 2 + else: + # two-dash long option: '--foo' -> 'foo' + dest = option_string.lstrip(self.prefix_chars) + break if not dest: - msg = f'dest= is required for options like {option_string!r}' + msg = f'dest= is required for options like {repr(option_strings)[1:-1]}' raise TypeError(msg) dest = dest.replace('-', '_') @@ -1740,8 +1788,8 @@ def _handle_conflict_resolve(self, action, conflicting_actions): action.container._remove_action(action) def _check_help(self, action): - if action.help and hasattr(self, "_get_formatter"): - formatter = self._get_formatter() + if action.help and hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() try: formatter._expand_help(action) except (ValueError, TypeError, KeyError) as exc: @@ -1896,6 +1944,9 @@ def __init__(self, self.suggest_on_error = suggest_on_error self.color = color + # Cached formatter for validation (avoids repeated _set_color calls) + self._cached_formatter = None + add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) self._optionals = add_group(_('options')) @@ -2727,6 +2778,13 @@ def _get_formatter(self): formatter._set_color(self.color) return formatter + def _get_validation_formatter(self): + # Return cached formatter for read-only validation operations + # (_expand_help and _format_args). Avoids repeated slow _set_color calls. + if self._cached_formatter is None: + self._cached_formatter = self._get_formatter() + return self._cached_formatter + # ===================== # Help-printing methods # ===================== @@ -2749,6 +2807,14 @@ def _print_message(self, message, file=None): except (AttributeError, OSError): pass + def _get_theme(self, file=None): + from _colorize import can_colorize, get_theme + + if self.color and can_colorize(file=file): + return get_theme(force_color=True).argparse + else: + return get_theme(force_no_color=True).argparse + # =============== # Exiting methods # =============== @@ -2768,13 +2834,21 @@ def error(self, message): should either exit or raise an exception. """ self.print_usage(_sys.stderr) + theme = self._get_theme(file=_sys.stderr) + fmt = _('%(prog)s: error: %(message)s\n') + fmt = fmt.replace('error: %(message)s', + f'{theme.error}error:{theme.reset} {theme.message}%(message)s{theme.reset}') + args = {'prog': self.prog, 'message': message} - self.exit(2, _('%(prog)s: error: %(message)s\n') % args) + self.exit(2, fmt % args) def _warning(self, message): + theme = self._get_theme(file=_sys.stderr) + fmt = _('%(prog)s: warning: %(message)s\n') + fmt = fmt.replace('warning: %(message)s', + f'{theme.warning}warning:{theme.reset} {theme.message}%(message)s{theme.reset}') args = {'prog': self.prog, 'message': message} - self._print_message(_('%(prog)s: warning: %(message)s\n') % args, _sys.stderr) - + self._print_message(fmt % args, _sys.stderr) def __getattr__(name): if name == "__version__": diff --git a/Lib/ast.py b/Lib/ast.py index 983ac1710d0..d9743ba7ab4 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -24,7 +24,7 @@ def parse(source, filename='', mode='exec', *, - type_comments=False, feature_version=None, optimize=-1): + type_comments=False, feature_version=None, optimize=-1, module=None): """ Parse the source into an AST node. Equivalent to compile(source, filename, mode, PyCF_ONLY_AST). @@ -44,7 +44,8 @@ def parse(source, filename='', mode='exec', *, feature_version = minor # Else it should be an int giving the minor version for 3.x. return compile(source, filename, mode, flags, - _feature_version=feature_version, optimize=optimize) + _feature_version=feature_version, optimize=optimize, + module=module) def literal_eval(node_or_string): diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index d40af422e61..321a4e5d5d1 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -26,6 +26,7 @@ def __init__(self, loop, protocol, args, shell, self._pending_calls = collections.deque() self._pipes = {} self._finished = False + self._pipes_connected = False if stdin == subprocess.PIPE: self._pipes[0] = None @@ -213,6 +214,7 @@ async def _connect_pipes(self, waiter): else: if waiter is not None and not waiter.cancelled(): waiter.set_result(None) + self._pipes_connected = True def _call(self, cb, *data): if self._pending_calls is not None: @@ -256,6 +258,15 @@ def _try_finish(self): assert not self._finished if self._returncode is None: return + if not self._pipes_connected: + # self._pipes_connected can be False if not all pipes were connected + # because either the process failed to start or the self._connect_pipes task + # got cancelled. In this broken state we consider all pipes disconnected and + # to avoid hanging forever in self._wait as otherwise _exit_waiters + # would never be woken up, we wake them up here. + for waiter in self._exit_waiters: + if not waiter.cancelled(): + waiter.set_result(self._returncode) if all(p is not None and p.disconnected for p in self._pipes.values()): self._finished = True diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py index 59e22f523a8..d2db1a930c2 100644 --- a/Lib/asyncio/streams.py +++ b/Lib/asyncio/streams.py @@ -667,8 +667,7 @@ async def readuntil(self, separator=b'\n'): # adds data which makes separator be found. That's why we check for # EOF *after* inspecting the buffer. if self._eof: - chunk = bytes(self._buffer) - self._buffer.clear() + chunk = self._buffer.take_bytes() raise exceptions.IncompleteReadError(chunk, None) # _wait_for_data() will resume reading if stream was paused. @@ -678,10 +677,9 @@ async def readuntil(self, separator=b'\n'): raise exceptions.LimitOverrunError( 'Separator is found, but chunk is longer than limit', match_start) - chunk = self._buffer[:match_end] - del self._buffer[:match_end] + chunk = self._buffer.take_bytes(match_end) self._maybe_resume_transport() - return bytes(chunk) + return chunk async def read(self, n=-1): """Read up to `n` bytes from the stream. @@ -716,20 +714,16 @@ async def read(self, n=-1): # collect everything in self._buffer, but that would # deadlock if the subprocess sends more than self.limit # bytes. So just call self.read(self._limit) until EOF. - blocks = [] - while True: - block = await self.read(self._limit) - if not block: - break - blocks.append(block) - return b''.join(blocks) + joined = bytearray() + while block := await self.read(self._limit): + joined += block + return joined.take_bytes() if not self._buffer and not self._eof: await self._wait_for_data('read') # This will work right even if buffer is less than n bytes - data = bytes(memoryview(self._buffer)[:n]) - del self._buffer[:n] + data = self._buffer.take_bytes(min(len(self._buffer), n)) self._maybe_resume_transport() return data @@ -760,18 +754,12 @@ async def readexactly(self, n): while len(self._buffer) < n: if self._eof: - incomplete = bytes(self._buffer) - self._buffer.clear() + incomplete = self._buffer.take_bytes() raise exceptions.IncompleteReadError(incomplete, n) await self._wait_for_data('readexactly') - if len(self._buffer) == n: - data = bytes(self._buffer) - self._buffer.clear() - else: - data = bytes(memoryview(self._buffer)[:n]) - del self._buffer[:n] + data = self._buffer.take_bytes(n) self._maybe_resume_transport() return data diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index f39e11fdd51..1d463ea09ba 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,6 +1,6 @@ """Tools to analyze tasks running in asyncio programs.""" -from collections import defaultdict, namedtuple +from collections import defaultdict from itertools import count from enum import Enum import sys diff --git a/Lib/base64.py b/Lib/base64.py index 5d78cc09f40..341bf8eaf18 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -193,7 +193,7 @@ def _b32encode(alphabet, s): encoded[-3:] = b'===' elif leftover == 4: encoded[-1:] = b'=' - return bytes(encoded) + return encoded.take_bytes() def _b32decode(alphabet, s, casefold=False, map01=None): # Delay the initialization of the table to not waste memory @@ -238,7 +238,7 @@ def _b32decode(alphabet, s, casefold=False, map01=None): last = acc.to_bytes(5) # big endian leftover = (43 - 5 * padchars) // 8 # 1: 4, 3: 3, 4: 2, 6: 1 decoded[-5:] = last[:leftover] - return bytes(decoded) + return decoded.take_bytes() def b32encode(s): @@ -462,9 +462,12 @@ def b85decode(b): # Delay the initialization of tables to not waste memory # if the function is never called if _b85dec is None: - _b85dec = [None] * 256 + # we don't assign to _b85dec directly to avoid issues when + # multiple threads call this function simultaneously + b85dec_tmp = [None] * 256 for i, c in enumerate(_b85alphabet): - _b85dec[c] = i + b85dec_tmp[c] = i + _b85dec = b85dec_tmp b = _bytes_from_decode_data(b) padding = (-len(b)) % 5 @@ -601,7 +604,14 @@ def main(): with open(args[0], 'rb') as f: func(f, sys.stdout.buffer) else: - func(sys.stdin.buffer, sys.stdout.buffer) + if sys.stdin.isatty(): + # gh-138775: read terminal input data all at once to detect EOF + import io + data = sys.stdin.buffer.read() + buffer = io.BytesIO(data) + else: + buffer = sys.stdin.buffer + func(buffer, sys.stdout.buffer) if __name__ == '__main__': diff --git a/Lib/bdb.py b/Lib/bdb.py index efc3e0a235a..50cf2b3f5b3 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -199,6 +199,8 @@ def __init__(self, skip=None, backend='settrace'): self.frame_returning = None self.trace_opcodes = False self.enterframe = None + self.cmdframe = None + self.cmdlineno = None self.code_linenos = weakref.WeakKeyDictionary() self.backend = backend if backend == 'monitoring': @@ -297,7 +299,12 @@ def dispatch_line(self, frame): self.user_line(). Raise BdbQuit if self.quitting is set. Return self.trace_dispatch to continue tracing in this scope. """ - if self.stop_here(frame) or self.break_here(frame): + # GH-136057 + # For line events, we don't want to stop at the same line where + # the latest next/step command was issued. + if (self.stop_here(frame) or self.break_here(frame)) and not ( + self.cmdframe == frame and self.cmdlineno == frame.f_lineno + ): self.user_line(frame) self.restart_events() if self.quitting: raise BdbQuit @@ -526,7 +533,8 @@ def _set_trace_opcodes(self, trace_opcodes): if self.monitoring_tracer: self.monitoring_tracer.update_local_events() - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False): + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False, + cmdframe=None, cmdlineno=None): """Set the attributes for stopping. If stoplineno is greater than or equal to 0, then stop at line @@ -539,6 +547,10 @@ def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False): # stoplineno >= 0 means: stop at line >= the stoplineno # stoplineno -1 means: don't stop at all self.stoplineno = stoplineno + # cmdframe/cmdlineno is the frame/line number when the user issued + # step/next commands. + self.cmdframe = cmdframe + self.cmdlineno = cmdlineno self._set_trace_opcodes(opcode) def _set_caller_tracefunc(self, current_frame): @@ -564,7 +576,9 @@ def set_until(self, frame, lineno=None): def set_step(self): """Stop after one line of code.""" - self._set_stopinfo(None, None) + # set_step() could be called from signal handler so enterframe might be None + self._set_stopinfo(None, None, cmdframe=self.enterframe, + cmdlineno=getattr(self.enterframe, 'f_lineno', None)) def set_stepinstr(self): """Stop before the next instruction.""" @@ -572,7 +586,7 @@ def set_stepinstr(self): def set_next(self, frame): """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) + self._set_stopinfo(frame, None, cmdframe=frame, cmdlineno=frame.f_lineno) def set_return(self, frame): """Stop when returning from the given frame.""" diff --git a/Lib/cProfile.py b/Lib/cProfile.py index 4af82f2cb8c..cc6255f61ae 100644 --- a/Lib/cProfile.py +++ b/Lib/cProfile.py @@ -9,6 +9,5 @@ __all__ = ["run", "runctx", "Profile"] if __name__ == "__main__": - import sys from profiling.tracing.__main__ import main main() diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index 25ac4d1d524..55ffc36ea5b 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -1542,6 +1542,8 @@ def format_map(self, mapping): return self.data.format_map(mapping) def index(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.index(sub, start, end) def isalpha(self): @@ -1610,6 +1612,8 @@ def rfind(self, sub, start=0, end=_sys.maxsize): return self.data.rfind(sub, start, end) def rindex(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.rindex(sub, start, end) def rjust(self, width, *args): diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index 53c6e757ded..85c1da2c722 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -2,7 +2,6 @@ from concurrent import interpreters import sys -import textwrap from . import thread as _thread import traceback diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index a14650bf5fa..a42afa68efc 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -474,9 +474,23 @@ def _terminate_broken(self, cause): bpe = BrokenProcessPool("A process in the process pool was " "terminated abruptly while the future was " "running or pending.") + cause_str = None if cause is not None: - bpe.__cause__ = _RemoteTraceback( - f"\n'''\n{''.join(cause)}'''") + cause_str = ''.join(cause) + else: + # No cause known, so report any processes that have + # terminated with nonzero exit codes, e.g. from a + # segfault. Multiple may terminate simultaneously, + # so include all of them in the traceback. + errors = [] + for p in self.processes.values(): + if p.exitcode is not None and p.exitcode != 0: + errors.append(f"Process {p.pid} terminated abruptly " + f"with exit code {p.exitcode}") + if errors: + cause_str = "\n".join(errors) + if cause_str: + bpe.__cause__ = _RemoteTraceback(f"\n'''\n{cause_str}'''") # Mark pending tasks as failed. for work_id, work_item in self.pending_work_items.items(): diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index b98f21dcbe9..730ced72998 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -441,9 +441,11 @@ def __init__(self, globals): self.locals = {} self.overwrite_errors = {} self.unconditional_adds = {} + self.method_annotations = {} def add_fn(self, name, args, body, *, locals=None, return_type=MISSING, - overwrite_error=False, unconditional_add=False, decorator=None): + overwrite_error=False, unconditional_add=False, decorator=None, + annotation_fields=None): if locals is not None: self.locals.update(locals) @@ -464,16 +466,14 @@ def add_fn(self, name, args, body, *, locals=None, return_type=MISSING, self.names.append(name) - if return_type is not MISSING: - self.locals[f'__dataclass_{name}_return_type__'] = return_type - return_annotation = f'->__dataclass_{name}_return_type__' - else: - return_annotation = '' + if annotation_fields is not None: + self.method_annotations[name] = (annotation_fields, return_type) + args = ','.join(args) body = '\n'.join(body) # Compute the text of the entire function, add it to the text we're generating. - self.src.append(f'{f' {decorator}\n' if decorator else ''} def {name}({args}){return_annotation}:\n{body}') + self.src.append(f'{f' {decorator}\n' if decorator else ''} def {name}({args}):\n{body}') def add_fns_to_class(self, cls): # The source to all of the functions we're generating. @@ -509,6 +509,15 @@ def add_fns_to_class(self, cls): # Now that we've generated the functions, assign them into cls. for name, fn in zip(self.names, fns): fn.__qualname__ = f"{cls.__qualname__}.{fn.__name__}" + + try: + annotation_fields, return_type = self.method_annotations[name] + except KeyError: + pass + else: + annotate_fn = _make_annotate_function(cls, name, annotation_fields, return_type) + fn.__annotate__ = annotate_fn + if self.unconditional_adds.get(name, False): setattr(cls, name, fn) else: @@ -524,6 +533,49 @@ def add_fns_to_class(self, cls): raise TypeError(error_msg) +def _make_annotate_function(__class__, method_name, annotation_fields, return_type): + # Create an __annotate__ function for a dataclass + # Try to return annotations in the same format as they would be + # from a regular __init__ function + + def __annotate__(format, /): + Format = annotationlib.Format + match format: + case Format.VALUE | Format.FORWARDREF | Format.STRING: + cls_annotations = {} + for base in reversed(__class__.__mro__): + cls_annotations.update( + annotationlib.get_annotations(base, format=format) + ) + + new_annotations = {} + for k in annotation_fields: + # gh-142214: The annotation may be missing in unusual dynamic cases. + # If so, just skip it. + try: + new_annotations[k] = cls_annotations[k] + except KeyError: + pass + + if return_type is not MISSING: + if format == Format.STRING: + new_annotations["return"] = annotationlib.type_repr(return_type) + else: + new_annotations["return"] = return_type + + return new_annotations + + case _: + raise NotImplementedError(format) + + # This is a flag for _add_slots to know it needs to regenerate this method + # In order to remove references to the original class when it is replaced + __annotate__.__generated_by_dataclasses__ = True + __annotate__.__qualname__ = f"{__class__.__qualname__}.{method_name}.__annotate__" + + return __annotate__ + + def _field_assign(frozen, name, value, self_name): # If we're a frozen class, then assign to our fields in __init__ # via object.__setattr__. Otherwise, just use a simple @@ -612,7 +664,7 @@ def _init_param(f): elif f.default_factory is not MISSING: # There's a factory function. Set a marker. default = '=__dataclass_HAS_DEFAULT_FACTORY__' - return f'{f.name}:__dataclass_type_{f.name}__{default}' + return f'{f.name}{default}' def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, @@ -635,11 +687,10 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, raise TypeError(f'non-default argument {f.name!r} ' f'follows default argument {seen_default.name!r}') - locals = {**{f'__dataclass_type_{f.name}__': f.type for f in fields}, - **{'__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, - '__dataclass_builtins_object__': object, - } - } + annotation_fields = [f.name for f in fields if f.init] + + locals = {'__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, + '__dataclass_builtins_object__': object} body_lines = [] for f in fields: @@ -670,7 +721,8 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, [self_name] + _init_params, body_lines, locals=locals, - return_type=None) + return_type=None, + annotation_fields=annotation_fields) def _frozen_get_del_attr(cls, fields, func_builder): @@ -1337,6 +1389,26 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): or _update_func_cell_for__class__(member.fdel, cls, newcls)): break + # Get new annotations to remove references to the original class + # in forward references + newcls_ann = annotationlib.get_annotations( + newcls, format=annotationlib.Format.FORWARDREF) + + # Fix references in dataclass Fields + for f in getattr(newcls, _FIELDS).values(): + try: + ann = newcls_ann[f.name] + except KeyError: + pass + else: + f.type = ann + + # Fix the class reference in the __annotate__ method + init = newcls.__init__ + if init_annotate := getattr(init, "__annotate__", None): + if getattr(init_annotate, "__generated_by_dataclasses__", False): + _update_func_cell_for__class__(init_annotate, cls, newcls) + return newcls diff --git a/Lib/doctest.py b/Lib/doctest.py index 92a2ab4f7e6..ad8fb900f69 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -104,7 +104,7 @@ def _test(): import traceback import types import unittest -from io import StringIO, IncrementalNewlineDecoder +from io import StringIO, TextIOWrapper, BytesIO from collections import namedtuple import _colorize # Used in doctests from _colorize import ANSIColors, can_colorize @@ -237,10 +237,6 @@ def _normalize_module(module, depth=2): else: raise TypeError("Expected a module, string, or None") -def _newline_convert(data): - # The IO module provides a handy decoder for universal newline conversion - return IncrementalNewlineDecoder(None, True).decode(data, True) - def _load_testfile(filename, package, module_relative, encoding): if module_relative: package = _normalize_module(package, 3) @@ -252,10 +248,9 @@ def _load_testfile(filename, package, module_relative, encoding): pass if hasattr(loader, 'get_data'): file_contents = loader.get_data(filename) - file_contents = file_contents.decode(encoding) # get_data() opens files as 'rb', so one must do the equivalent # conversion as universal newlines would do. - return _newline_convert(file_contents), filename + return TextIOWrapper(BytesIO(file_contents), encoding=encoding, newline=None).read(), filename with open(filename, encoding=encoding) as f: return f.read(), filename diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 91243378dc0..c7f665b3990 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -796,6 +796,10 @@ def params(self): value = urllib.parse.unquote(value, encoding='latin-1') else: try: + # Explicitly look up the codec for warning generation, see gh-140030 + # Can be removed in 3.17 + import codecs + codecs.lookup(charset) value = value.decode(charset, 'surrogateescape') except (LookupError, UnicodeEncodeError): # XXX: there should really be a custom defect for diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index 84917038874..6a7c5fa06d2 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -146,8 +146,9 @@ def _parsedate_tz(data): return None # Check for a yy specified in two-digit format, then convert it to the # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already + # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues + # this requirement. For more information, see the documentation for # the time module. if yy < 100: # The year is between 1969 and 1999 (inclusive). @@ -233,9 +234,11 @@ def __init__(self, field): self.CR = '\r\n' self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies '.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. + # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle + # existing practice (periods in display names), even though it was not + # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822) + # continues this requirement. We must recognize obsolete syntax, so + # allow dots in phrases. self.phraseends = self.atomends.replace('.', '') self.field = field self.commentlist = [] diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index 95e79b8938b..e23843df448 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -380,7 +380,7 @@ def _fold(self, name, value, sanitize): h = value if h is not None: # The Header class interprets a value of None for maxlinelen as the - # default value of 78, as recommended by RFC 2822. + # default value of 78, as recommended by RFC 5322 section 2.1.1. maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 9d80a5822af..6479b9bab7a 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -32,7 +32,7 @@ NLCRE_bol = re.compile(r'(\r\n|\r|\n)') NLCRE_eol = re.compile(r'(\r\n|\r|\n)\z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' @@ -294,7 +294,7 @@ def _parsegen(self): return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. + # another RFC 5322 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ab5bd0653e4..03524c96559 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -50,7 +50,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, expanded to 8 spaces) than maxheaderlen, the header will split as defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. + by RFC 5322 section 2.1.1. The policy keyword specifies a policy object that controls a number of aspects of the generator's operation. If no policy is specified, diff --git a/Lib/email/message.py b/Lib/email/message.py index 4380e0ec50b..641fb2e944d 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -141,7 +141,7 @@ def _decode_uu(encoded): class Message: """Basic message object. - A message object is defined as something that has a bunch of RFC 2822 + A message object is defined as something that has a bunch of RFC 5322 headers and a payload. It may optionally have an envelope header (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a multipart or a message/rfc822), then the payload is a list of Message diff --git a/Lib/email/parser.py b/Lib/email/parser.py index 039f03cba74..c6a51dd8e37 100644 --- a/Lib/email/parser.py +++ b/Lib/email/parser.py @@ -2,7 +2,7 @@ # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: email-sig@python.org -"""A parser of RFC 2822 and MIME email messages.""" +"""A parser of RFC 5322 and MIME email messages.""" __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 'FeedParser', 'BytesFeedParser'] @@ -15,13 +15,13 @@ class Parser: def __init__(self, _class=None, *, policy=compat32): - """Parser of RFC 2822 and MIME email messages. + """Parser of RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The string must be formatted as a block of RFC 2822 headers and header + The string must be formatted as a block of RFC 5322 headers and header continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -75,13 +75,13 @@ def parsestr(self, text, headersonly=True): class BytesParser: def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. + """Parser of binary RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The input must be formatted as a block of RFC 2822 headers and header + The input must be formatted as a block of RFC 5322 headers and header continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/Lib/email/utils.py b/Lib/email/utils.py index 3de1f0d24a1..d4824dc3601 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -460,6 +460,10 @@ def collapse_rfc2231_value(value, errors='replace', charset = fallback_charset rawbytes = bytes(text, 'raw-unicode-escape') try: + # Explicitly look up the codec for warning generation, see gh-140030 + # Can be removed in 3.17 + import codecs + codecs.lookup(charset) return str(rawbytes, charset, errors) except LookupError: # charset is not a known codec. diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index e7e4ca3358e..e205ec32637 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -26,7 +26,7 @@ (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -"""#" +""" import codecs import sys @@ -56,6 +56,12 @@ def normalize_encoding(encoding): if isinstance(encoding, bytes): encoding = str(encoding, "ascii") + if not encoding.isascii(): + import warnings + warnings.warn( + "Support for non-ascii encoding names will be removed in 3.17", + DeprecationWarning, stacklevel=2) + return _normalize_encoding(encoding) def search_function(encoding): diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index 0c90b4c9fe1..d31ee07ab45 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -226,7 +226,8 @@ def encode(self, input, errors='strict'): offset + exc.end, exc.reason, ) - return bytes(result+trailing_dot), len(input) + result += trailing_dot + return result.take_bytes(), len(input) def decode(self, input, errors='strict'): @@ -311,7 +312,7 @@ def _buffer_encode(self, input, errors, final): result += trailing_dot size += len(trailing_dot) - return (bytes(result), size) + return (result.take_bytes(), size) class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): diff --git a/Lib/encodings/punycode.py b/Lib/encodings/punycode.py index 4622fc8c920..268fccbd539 100644 --- a/Lib/encodings/punycode.py +++ b/Lib/encodings/punycode.py @@ -17,7 +17,7 @@ def segregate(str): else: extended.add(c) extended = sorted(extended) - return bytes(base), extended + return base.take_bytes(), extended def selective_len(str, max): """Return the length of str, considering only characters below max.""" @@ -83,7 +83,7 @@ def generate_generalized_integer(N, bias): t = T(j, bias) if N < t: result.append(digits[N]) - return bytes(result) + return result.take_bytes() result.append(digits[t + ((N - t) % (36 - t))]) N = (N - t) // (36 - t) j += 1 @@ -112,7 +112,7 @@ def generate_integers(baselen, deltas): s = generate_generalized_integer(delta, bias) result.extend(s) bias = adapt(delta, points==0, baselen+points+1) - return bytes(result) + return result.take_bytes() def punycode_encode(text): base, extended = segregate(text) diff --git a/Lib/functools.py b/Lib/functools.py index a92844ba722..8063eb5ffc3 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -1083,7 +1083,10 @@ def __call__(self, /, *args, **kwargs): 'singledispatchmethod method') raise TypeError(f'{funcname} requires at least ' '1 positional argument') - return self._dispatch(args[0].__class__).__get__(self._obj, self._cls)(*args, **kwargs) + method = self._dispatch(args[0].__class__) + if hasattr(method, "__get__"): + method = method.__get__(self._obj, self._cls) + return method(*args, **kwargs) def __getattr__(self, name): # Resolve these attributes lazily to speed up creation of diff --git a/Lib/html/parser.py b/Lib/html/parser.py index e50620de800..80fb8c3f929 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -24,6 +24,7 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +incomplete_charref = re.compile('&#(?:[0-9]|[xX][0-9a-fA-F])') attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') @@ -304,10 +305,20 @@ def goahead(self, end): k = k - 1 i = self.updatepos(i, k) continue + match = incomplete_charref.match(rawdata, i) + if match: + if end: + self.handle_charref(rawdata[i+2:]) + i = self.updatepos(i, n) + break + # incomplete + break + elif i + 3 < n: # larger than "&#x" + # not the end of the buffer, and can't be confused + # with some other construct + self.handle_data("&#") + i = self.updatepos(i, i + 2) else: - if ";" in rawdata[i:]: # bail by consuming &# - self.handle_data(rawdata[i:i+2]) - i = self.updatepos(i, i+2) break elif startswith('&', i): match = entityref.match(rawdata, i) @@ -321,15 +332,13 @@ def goahead(self, end): continue match = incomplete.match(rawdata, i) if match: - # match.group() will contain at least 2 chars - if end and match.group() == rawdata[i:]: - k = match.end() - if k <= i: - k = n - i = self.updatepos(i, i + 1) + if end: + self.handle_entityref(rawdata[i+1:]) + i = self.updatepos(i, n) + break # incomplete break - elif (i + 1) < n: + elif i + 1 < n: # not the end of the buffer, and can't be confused # with some other construct self.handle_data("&") diff --git a/Lib/http/client.py b/Lib/http/client.py index 425d9bdad8c..73c3256734a 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -111,6 +111,11 @@ _MAXLINE = 65536 _MAXHEADERS = 100 +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 + + # Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) # # VCHAR = %x21-7E @@ -231,7 +236,7 @@ def _read_headers(fp, max_headers): def _parse_header_lines(header_lines, _class=HTTPMessage): """ - Parses only RFC2822 headers from header lines. + Parses only RFC 5322 headers from header lines. email Parser wants to see strings rather than bytes. But a TextIOWrapper around self.rfile would buffer too many bytes @@ -244,7 +249,7 @@ def _parse_header_lines(header_lines, _class=HTTPMessage): return email.parser.Parser(_class=_class).parsestr(hstring) def parse_headers(fp, _class=HTTPMessage, *, _max_headers=None): - """Parses only RFC2822 headers from a file pointer.""" + """Parses only RFC 5322 headers from a file pointer.""" headers = _read_headers(fp, _max_headers) return _parse_header_lines(headers, _class) @@ -642,10 +647,25 @@ def _safe_read(self, amt): reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) - return data + cursize = min(amt, _MIN_READ_BUF_SIZE) + data = self.fp.read(cursize) + if len(data) >= amt: + return data + if len(data) < cursize: + raise IncompleteRead(data, amt - len(data)) + + data = io.BytesIO(data) + data.seek(0, 2) + while True: + # This is a geometric increase in read size (never more than + # doubling out the current length of data per loop iteration). + delta = min(cursize, amt - cursize) + data.write(self.fp.read(delta)) + if data.tell() >= amt: + return data.getvalue() + cursize += delta + if data.tell() < cursize: + raise IncompleteRead(data.getvalue(), amt - data.tell()) def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" diff --git a/Lib/idlelib/News3.txt b/Lib/idlelib/News3.txt index 30784578cc6..53d83762f99 100644 --- a/Lib/idlelib/News3.txt +++ b/Lib/idlelib/News3.txt @@ -4,6 +4,9 @@ Released on 2025-10-07 ========================= +gh-129873: Simplify displaying the IDLE doc by only copying the text +section of idle.html to idlelib/help.html. Patch by Stan Ulbrych. + gh-112936: IDLE - Include Shell menu in single-process mode, though with Restart Shell and View Last Restart disabled. Patch by Zhikang Yan. @@ -26,9 +29,6 @@ Released on 2024-10-07 gh-120104: Fix padding in config and search dialog windows in IDLE. -gh-129873: Simplify displaying the IDLE doc by only copying the text -section of idle.html to idlelib/help.html. Patch by Stan Ulbrych. - gh-120083: Add explicit black IDLE Hovertip foreground color needed for recent macOS. Fixes Sonoma showing unreadable white on pale yellow. Patch by John Riggles. diff --git a/Lib/idlelib/help.html b/Lib/idlelib/help.html index ebff9a309d9..fc618ab727d 100644 --- a/Lib/idlelib/help.html +++ b/Lib/idlelib/help.html @@ -53,7 +53,7 @@ and after the window title. If there is no associated file, do Save As instead.

Save As…

Save the current window with a Save As dialog. The file saved becomes the -new associated file for the window. (If your file namager is set to hide +new associated file for the window. (If your file manager is set to hide extensions, the current extension will be omitted in the file name box. If the new filename has no ‘.’, ‘.py’ and ‘.txt’ will be added for Python and text files, except that on macOS Aqua,’.py’ is added for all files.)

@@ -143,8 +143,8 @@ paragraph will be formatted to less than N columns, where N defaults to 72.

New Indent Width

Open a dialog to change indent width. The accepted default by the Python community is 4 spaces.

-
Strip Trailing Chitespace

Remove trailing space and other whitespace characters after the last -non-whitespace character of a line by applying str.rstrip to each line, +

Strip Trailing Whitespace

Remove trailing space and other whitespace characters after the last +non-whitespace character of a line by applying str.rstrip() to each line, including lines within multiline strings. Except for Shell windows, remove extra newlines at the end of the file.

@@ -337,16 +337,16 @@ Unix and the Command key on assume that the keys have not been re-bound to something else.)

  • Arrow keys move the cursor one character or line.

  • -
  • C-LeftArrow and C-RightArrow moves left or right one word.

  • +
  • C-LeftArrow and C-RightArrow moves left or right one word.

  • Home and End go to the beginning or end of the line.

  • Page Up and Page Down go up or down one screen.

  • -
  • C-Home and C-End go to beginning or end of the file.

  • -
  • Backspace and Del (or C-d) delete the previous +

  • C-Home and C-End go to beginning or end of the file.

  • +
  • Backspace and Del (or C-d) delete the previous or next character.

  • -
  • C-Backspace and C-Del delete one word left or right.

  • -
  • C-k deletes (‘kills’) everything to the right.

  • +
  • C-Backspace and C-Del delete one word left or right.

  • +
  • C-k deletes (‘kills’) everything to the right.

-

Standard keybindings (like C-c to copy and C-v to paste) +

Standard keybindings (like C-c to copy and C-v to paste) may work. Keybindings are selected in the Configure IDLE dialog.

@@ -390,7 +390,7 @@ one can specify a drive first.) Move into subdirectories by typing a directory name and a separator.

Instead of waiting, or after a box is closed, open a completion box immediately with Show Completions on the Edit menu. The default hot -key is C-space. If one types a prefix for the desired name +key is C-space. If one types a prefix for the desired name before opening the box, the first match or near miss is made visible. The result is the same as if one enters a prefix after the box is displayed. Show Completions after a quote completes @@ -473,9 +473,9 @@ in an editor window.

The editing features described in previous subsections work when entering code interactively. IDLE’s Shell window also responds to the following:

    -
  • C-c attempts to interrupt statement execution (but may fail).

  • -
  • C-d closes Shell if typed at a >>> prompt.

  • -
  • Alt-p and Alt-n (C-p and C-n on macOS) +

  • C-c attempts to interrupt statement execution (but may fail).

  • +
  • C-d closes Shell if typed at a >>> prompt.

  • +
  • Alt-p and Alt-n (C-p and C-n on macOS) retrieve to the current prompt the previous or next previously entered statement that matches anything already typed.

  • Return while the cursor is on any previous statement @@ -517,27 +517,73 @@ executed in the Tk namespace, so this file is not useful for importing functions to be used from IDLE’s Python shell.

    Command line usage

    -
    idle.py [-c command] [-d] [-e] [-h] [-i] [-r file] [-s] [-t title] [-] [arg] ...
    -
    --c command  run command in the shell window
    --d          enable debugger and open shell window
    --e          open editor window
    --h          print help message with legal combinations and exit
    --i          open shell window
    --r file     run file in shell window
    --s          run $IDLESTARTUP or $PYTHONSTARTUP first, in shell window
    --t title    set title of shell window
    --           run stdin in shell (- must be last option before args)
    +

    IDLE can be invoked from the command line with various options. The general syntax is:

    +
    python -m idlelib [options] [file ...]
     
    -

    If there are arguments:

    +

    The following options are available:

    +
    +
    +-c <command>
    +

    Run the specified Python command in the shell window. +For example, pass -c "print('Hello, World!')". +On Windows, the outer quotes must be double quotes as shown.

    +
    + +
    +
    +-d
    +

    Enable the debugger and open the shell window.

    +
    + +
    +
    +-e
    +

    Open an editor window.

    +
    + +
    +
    +-h
    +

    Print a help message with legal combinations of options and exit.

    +
    + +
    +
    +-i
    +

    Open a shell window.

    +
    + +
    +
    +-r <file>
    +

    Run the specified file in the shell window.

    +
    + +
    +
    +-s
    +

    Run the startup file (as defined by the environment variables IDLESTARTUP or PYTHONSTARTUP) before opening the shell window.

    +
    + +
    +
    +-t <title>
    +

    Set the title of the shell window.

    +
    + +
    +
    +-
    +

    Read and execute standard input in the shell window. This option must be the last one before any arguments.

    +
    + +

    If arguments are provided:

      -
    • If -, -c, or r is used, all arguments are placed in -sys.argv[1:...] and sys.argv[0] is set to '', '-c', -or '-r'. No editor window is opened, even if that is the default -set in the Options dialog.

    • -
    • Otherwise, arguments are files opened for editing and -sys.argv reflects the arguments passed to IDLE itself.

    • +
    • If -, -c, or -r is used, all arguments are placed in sys.argv[1:], +and sys.argv[0] is set to '', '-c', or '-r' respectively. +No editor window is opened, even if that is the default set in the Options dialog.

    • +
    • Otherwise, arguments are treated as files to be opened for editing, and sys.argv reflects the arguments passed to IDLE itself.

    @@ -798,7 +844,7 @@ of this page for how to use IDLE.

    either in idlelib or click Help => About IDLE on the IDLE menu. This file also maps IDLE menu items to the code that implements the item. Except for files listed under ‘Startup’, the idlelib code is ‘private’ in -sense that feature changes can be backported (see PEP 434).

    +sense that feature changes can be backported (see PEP 434).

diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 035ae0fcae1..2f9307cba4f 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -208,12 +208,8 @@ def _write_atomic(path, data, mode=0o666): try: # We first write data to a temporary file, and then use os.replace() to # perform an atomic rename. - with _io.FileIO(fd, 'wb') as file: - bytes_written = file.write(data) - if bytes_written != len(data): - # Raise an OSError so the 'except' below cleans up the partially - # written file. - raise OSError("os.write() didn't write the full pyc file") + with _io.open(fd, 'wb') as file: + file.write(data) _os.replace(path_tmp, path) except OSError: try: @@ -552,8 +548,7 @@ def decode_source(source_bytes): import tokenize # To avoid bootstrap issues. source_bytes_readline = _io.BytesIO(source_bytes).readline encoding = tokenize.detect_encoding(source_bytes_readline) - newline_decoder = _io.IncrementalNewlineDecoder(None, True) - return newline_decoder.decode(source_bytes.decode(encoding[0])) + return _io.TextIOWrapper(_io.BytesIO(source_bytes), encoding=encoding[0], newline=None).read() # Module specifications ####################################################### @@ -819,13 +814,14 @@ def get_source(self, fullname): name=fullname) from exc return decode_source(source_bytes) - def source_to_code(self, data, path, *, _optimize=-1): + def source_to_code(self, data, path, fullname=None, *, _optimize=-1): """Return the code object compiled from source. The 'data' argument can be any object type that compile() supports. """ return _bootstrap._call_with_frames_removed(compile, data, path, 'exec', - dont_inherit=True, optimize=_optimize) + dont_inherit=True, optimize=_optimize, + module=fullname) def get_code(self, fullname): """Concrete implementation of InspectLoader.get_code. @@ -894,7 +890,7 @@ def get_code(self, fullname): source_path=source_path) if source_bytes is None: source_bytes = self.get_data(source_path) - code_object = self.source_to_code(source_bytes, source_path) + code_object = self.source_to_code(source_bytes, source_path, fullname) _bootstrap._verbose_message('code object from {}', source_path) if (not sys.dont_write_bytecode and bytecode_path is not None and source_mtime is not None): @@ -1186,7 +1182,7 @@ def get_source(self, fullname): return '' def get_code(self, fullname): - return compile('', '', 'exec', dont_inherit=True) + return compile('', '', 'exec', dont_inherit=True, module=fullname) def create_module(self, spec): """Use default semantics for module creation.""" diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py index 1e47495f65f..5c13432b5bd 100644 --- a/Lib/importlib/abc.py +++ b/Lib/importlib/abc.py @@ -108,7 +108,7 @@ def get_code(self, fullname): source = self.get_source(fullname) if source is None: return None - return self.source_to_code(source) + return self.source_to_code(source, '', fullname) @abc.abstractmethod def get_source(self, fullname): @@ -120,12 +120,12 @@ def get_source(self, fullname): raise ImportError @staticmethod - def source_to_code(data, path=''): + def source_to_code(data, path='', fullname=None): """Compile 'data' into a code object. The 'data' argument can be anything that compile() can handle. The'path' argument should be where the data was retrieved (when applicable).""" - return compile(data, path, 'exec', dont_inherit=True) + return compile(data, path, 'exec', dont_inherit=True, module=fullname) exec_module = _bootstrap_external._LoaderBasics.exec_module load_module = _bootstrap_external._LoaderBasics.load_module @@ -163,9 +163,8 @@ def get_code(self, fullname): try: path = self.get_filename(fullname) except ImportError: - return self.source_to_code(source) - else: - return self.source_to_code(source, path) + path = '' + return self.source_to_code(source, path, fullname) _register( ExecutionLoader, diff --git a/Lib/inspect.py b/Lib/inspect.py index bb22bab3040..8e7511b3af0 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -706,8 +706,8 @@ def _findclass(func): return None return cls -def _finddoc(obj): - if isclass(obj): +def _finddoc(obj, *, search_in_class=True): + if search_in_class and isclass(obj): for base in obj.__mro__: if base is not object: try: @@ -747,6 +747,12 @@ def _finddoc(obj): cls = _findclass(obj.fget) if cls is None or getattr(cls, name) is not obj: return None + # Should be tested before ismethoddescriptor() + elif isinstance(obj, functools.cached_property): + name = obj.attrname + cls = _findclass(obj.func) + if cls is None or getattr(cls, name) is not obj: + return None elif ismethoddescriptor(obj) or isdatadescriptor(obj): name = obj.__name__ cls = obj.__objclass__ @@ -767,19 +773,37 @@ def _finddoc(obj): return doc return None -def getdoc(object): +def _getowndoc(obj): + """Get the documentation string for an object if it is not + inherited from its class.""" + try: + doc = object.__getattribute__(obj, '__doc__') + if doc is None: + return None + if obj is not type: + typedoc = type(obj).__doc__ + if isinstance(typedoc, str) and typedoc == doc: + return None + return doc + except AttributeError: + return None + +def getdoc(object, *, fallback_to_class_doc=True, inherit_class_doc=True): """Get the documentation string for an object. All tabs are expanded to spaces. To clean up docstrings that are indented to line up with blocks of code, any whitespace than can be uniformly removed from the second line onwards is removed.""" - try: - doc = object.__doc__ - except AttributeError: - return None + if fallback_to_class_doc: + try: + doc = object.__doc__ + except AttributeError: + return None + else: + doc = _getowndoc(object) if doc is None: try: - doc = _finddoc(object) + doc = _finddoc(object, search_in_class=inherit_class_doc) except (AttributeError, TypeError): return None if not isinstance(doc, str): diff --git a/Lib/ipaddress.py b/Lib/ipaddress.py index aa0cf4a0620..f1062a8cd05 100644 --- a/Lib/ipaddress.py +++ b/Lib/ipaddress.py @@ -1546,7 +1546,7 @@ def __init__(self, address, strict=True): if self._prefixlen == (self.max_prefixlen - 1): self.hosts = self.__iter__ elif self._prefixlen == (self.max_prefixlen): - self.hosts = lambda: [IPv4Address(addr)] + self.hosts = lambda: iter((IPv4Address(addr),)) @property @functools.lru_cache() @@ -2337,7 +2337,7 @@ def __init__(self, address, strict=True): if self._prefixlen == (self.max_prefixlen - 1): self.hosts = self.__iter__ elif self._prefixlen == self.max_prefixlen: - self.hosts = lambda: [IPv6Address(addr)] + self.hosts = lambda: iter((IPv6Address(addr),)) def hosts(self): """Generate Iterator over usable hosts in a network. diff --git a/Lib/json/__init__.py b/Lib/json/__init__.py index c8fdd0d99a0..89396b25a2c 100644 --- a/Lib/json/__init__.py +++ b/Lib/json/__init__.py @@ -127,8 +127,9 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, instead of raising a ``TypeError``. If ``ensure_ascii`` is false, then the strings written to ``fp`` can - contain non-ASCII characters if they appear in strings contained in - ``obj``. Otherwise, all such characters are escaped in JSON strings. + contain non-ASCII and non-printable characters if they appear in strings + contained in ``obj``. Otherwise, all such characters are escaped in JSON + strings. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -144,10 +145,11 @@ def dump(obj, fp, *, skipkeys=False, ensure_ascii=True, check_circular=True, level of 0 will only insert newlines. ``None`` is the most compact representation. - If specified, ``separators`` should be an ``(item_separator, key_separator)`` - tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and - ``(',', ': ')`` otherwise. To get the most compact JSON representation, - you should specify ``(',', ':')`` to eliminate whitespace. + If specified, ``separators`` should be an ``(item_separator, + key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is + ``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON + representation, you should specify ``(',', ':')`` to eliminate + whitespace. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. @@ -188,9 +190,10 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, (``str``, ``int``, ``float``, ``bool``, ``None``) will be skipped instead of raising a ``TypeError``. - If ``ensure_ascii`` is false, then the return value can contain non-ASCII - characters if they appear in strings contained in ``obj``. Otherwise, all - such characters are escaped in JSON strings. + If ``ensure_ascii`` is false, then the return value can contain + non-ASCII and non-printable characters if they appear in strings + contained in ``obj``. Otherwise, all such characters are escaped in + JSON strings. If ``check_circular`` is false, then the circular reference check for container types will be skipped and a circular reference will @@ -206,10 +209,11 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, level of 0 will only insert newlines. ``None`` is the most compact representation. - If specified, ``separators`` should be an ``(item_separator, key_separator)`` - tuple. The default is ``(', ', ': ')`` if *indent* is ``None`` and - ``(',', ': ')`` otherwise. To get the most compact JSON representation, - you should specify ``(',', ':')`` to eliminate whitespace. + If specified, ``separators`` should be an ``(item_separator, + key_separator)`` tuple. The default is ``(', ', ': ')`` if *indent* is + ``None`` and ``(',', ': ')`` otherwise. To get the most compact JSON + representation, you should specify ``(',', ':')`` to eliminate + whitespace. ``default(obj)`` is a function that should return a serializable version of obj or raise TypeError. The default simply raises TypeError. @@ -280,11 +284,12 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. If ``object_hook`` - is also defined, the ``object_pairs_hook`` takes priority. + ``object_pairs_hook`` is an optional function that will be called with + the result of any object literal decoded with an ordered list of pairs. + The return value of ``object_pairs_hook`` will be used instead of the + ``dict``. This feature can be used to implement custom decoders. If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes + priority. To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` kwarg; otherwise ``JSONDecoder`` is used. @@ -305,11 +310,12 @@ def loads(s, *, cls=None, object_hook=None, parse_float=None, ``object_hook`` will be used instead of the ``dict``. This feature can be used to implement custom decoders (e.g. JSON-RPC class hinting). - ``object_pairs_hook`` is an optional function that will be called with the - result of any object literal decoded with an ordered list of pairs. The - return value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. If ``object_hook`` - is also defined, the ``object_pairs_hook`` takes priority. + ``object_pairs_hook`` is an optional function that will be called with + the result of any object literal decoded with an ordered list of pairs. + The return value of ``object_pairs_hook`` will be used instead of the + ``dict``. This feature can be used to implement custom decoders. If + ``object_hook`` is also defined, the ``object_pairs_hook`` takes + priority. ``parse_float``, if specified, will be called with the string of every JSON float to be decoded. By default this is equivalent to diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py index ff4bfcdcc40..92ad6352557 100644 --- a/Lib/json/decoder.py +++ b/Lib/json/decoder.py @@ -297,10 +297,10 @@ def __init__(self, *, object_hook=None, parse_float=None, place of the given ``dict``. This can be used to provide custom deserializations (e.g. to support JSON-RPC class hinting). - ``object_pairs_hook``, if specified will be called with the result of - every JSON object decoded with an ordered list of pairs. The return - value of ``object_pairs_hook`` will be used instead of the ``dict``. - This feature can be used to implement custom decoders. + ``object_pairs_hook``, if specified will be called with the result + of every JSON object decoded with an ordered list of pairs. The + return value of ``object_pairs_hook`` will be used instead of the + ``dict``. This feature can be used to implement custom decoders. If ``object_hook`` is also defined, the ``object_pairs_hook`` takes priority. diff --git a/Lib/json/encoder.py b/Lib/json/encoder.py index bc446e0f377..4c70e8b75ed 100644 --- a/Lib/json/encoder.py +++ b/Lib/json/encoder.py @@ -111,9 +111,10 @@ def __init__(self, *, skipkeys=False, ensure_ascii=True, encoding of keys that are not str, int, float, bool or None. If skipkeys is True, such items are simply skipped. - If ensure_ascii is true, the output is guaranteed to be str - objects with all incoming non-ASCII characters escaped. If - ensure_ascii is false, the output can contain non-ASCII characters. + If ensure_ascii is true, the output is guaranteed to be str objects + with all incoming non-ASCII and non-printable characters escaped. + If ensure_ascii is false, the output can contain non-ASCII and + non-printable characters. If check_circular is true, then lists, dicts, and custom encoded objects will be checked for circular references during encoding to @@ -134,14 +135,15 @@ def __init__(self, *, skipkeys=False, ensure_ascii=True, indent level. An indent level of 0 will only insert newlines. None is the most compact representation. - If specified, separators should be an (item_separator, key_separator) - tuple. The default is (', ', ': ') if *indent* is ``None`` and - (',', ': ') otherwise. To get the most compact JSON representation, - you should specify (',', ':') to eliminate whitespace. + If specified, separators should be an (item_separator, + key_separator) tuple. The default is (', ', ': ') if *indent* is + ``None`` and (',', ': ') otherwise. To get the most compact JSON + representation, you should specify (',', ':') to eliminate + whitespace. If specified, default is a function that gets called for objects - that can't otherwise be serialized. It should return a JSON encodable - version of the object or raise a ``TypeError``. + that can't otherwise be serialized. It should return a JSON + encodable version of the object or raise a ``TypeError``. """ @@ -262,17 +264,6 @@ def floatstr(o, allow_nan=self.allow_nan, def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, - ## HACK: hand-optimized bytecode; turn globals into locals - ValueError=ValueError, - dict=dict, - float=float, - id=id, - int=int, - isinstance=isinstance, - list=list, - str=str, - tuple=tuple, - _intstr=int.__repr__, ): def _iterencode_list(lst, _current_indent_level): @@ -309,7 +300,7 @@ def _iterencode_list(lst, _current_indent_level): # Subclasses of int/float may override __repr__, but we still # want to encode them as integers/floats in JSON. One example # within the standard library is IntEnum. - yield buf + _intstr(value) + yield buf + int.__repr__(value) elif isinstance(value, float): # see comment above for int yield buf + _floatstr(value) @@ -372,7 +363,7 @@ def _iterencode_dict(dct, _current_indent_level): key = 'null' elif isinstance(key, int): # see comment for int/float in _make_iterencode - key = _intstr(key) + key = int.__repr__(key) elif _skipkeys: continue else: @@ -397,7 +388,7 @@ def _iterencode_dict(dct, _current_indent_level): yield 'false' elif isinstance(value, int): # see comment for int/float in _make_iterencode - yield _intstr(value) + yield int.__repr__(value) elif isinstance(value, float): # see comment for int/float in _make_iterencode yield _floatstr(value) @@ -432,7 +423,7 @@ def _iterencode(o, _current_indent_level): yield 'false' elif isinstance(o, int): # see comment for int/float in _make_iterencode - yield _intstr(o) + yield int.__repr__(o) elif isinstance(o, float): # see comment for int/float in _make_iterencode yield _floatstr(o) @@ -456,4 +447,13 @@ def _iterencode(o, _current_indent_level): raise if markers is not None: del markers[markerid] - return _iterencode + + def _iterencode_once(o, _current_indent_level): + nonlocal _iterencode, _iterencode_dict, _iterencode_list + try: + yield from _iterencode(o, _current_indent_level) + finally: + # Break reference cycles due to mutually recursive closures: + del _iterencode, _iterencode_dict, _iterencode_list + + return _iterencode_once diff --git a/Lib/linecache.py b/Lib/linecache.py index ef3b2d9136b..b5bf9dbdd3c 100644 --- a/Lib/linecache.py +++ b/Lib/linecache.py @@ -224,21 +224,58 @@ def lazycache(filename, module_globals): def _make_lazycache_entry(filename, module_globals): if not filename or (filename.startswith('<') and filename.endswith('>')): return None - # Try for a __loader__, if available - if module_globals and '__name__' in module_globals: - spec = module_globals.get('__spec__') - name = getattr(spec, 'name', None) or module_globals['__name__'] - loader = getattr(spec, 'loader', None) - if loader is None: - loader = module_globals.get('__loader__') - get_source = getattr(loader, 'get_source', None) - if name and get_source: - def get_lines(name=name, *args, **kwargs): - return get_source(name, *args, **kwargs) - return (get_lines,) - return None + if module_globals is not None and not isinstance(module_globals, dict): + raise TypeError(f'module_globals must be a dict, not {type(module_globals).__qualname__}') + if not module_globals or '__name__' not in module_globals: + return None + spec = module_globals.get('__spec__') + name = getattr(spec, 'name', None) or module_globals['__name__'] + if name is None: + return None + + loader = _bless_my_loader(module_globals) + if loader is None: + return None + + get_source = getattr(loader, 'get_source', None) + if get_source is None: + return None + + def get_lines(name=name, *args, **kwargs): + return get_source(name, *args, **kwargs) + return (get_lines,) + +def _bless_my_loader(module_globals): + # Similar to _bless_my_loader() in importlib._bootstrap_external, + # but always emits warnings instead of errors. + loader = module_globals.get('__loader__') + if loader is None and '__spec__' not in module_globals: + return None + spec = module_globals.get('__spec__') + + # The __main__ module has __spec__ = None. + if spec is None and module_globals.get('__name__') == '__main__': + return loader + + spec_loader = getattr(spec, 'loader', None) + if spec_loader is None: + import warnings + warnings.warn( + 'Module globals is missing a __spec__.loader', + DeprecationWarning) + return loader + + assert spec_loader is not None + if loader is not None and loader != spec_loader: + import warnings + warnings.warn( + 'Module globals; __loader__ != __spec__.loader', + DeprecationWarning) + return loader + + return spec_loader def _register_code(code, string, name): diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py index 48a9f430d45..07ac079186f 100644 --- a/Lib/mimetypes.py +++ b/Lib/mimetypes.py @@ -486,26 +486,28 @@ def _default_mime_types(): '.wiz' : 'application/msword', '.nq' : 'application/n-quads', '.nt' : 'application/n-triples', + '.cjs' : 'application/node', '.bin' : 'application/octet-stream', '.a' : 'application/octet-stream', - '.dll' : 'application/octet-stream', - '.exe' : 'application/octet-stream', '.o' : 'application/octet-stream', '.obj' : 'application/octet-stream', '.so' : 'application/octet-stream', '.oda' : 'application/oda', '.ogx' : 'application/ogg', '.pdf' : 'application/pdf', + '.ai' : 'application/pdf', '.p7c' : 'application/pkcs7-mime', '.ps' : 'application/postscript', - '.ai' : 'application/postscript', '.eps' : 'application/postscript', + '.rtf' : 'application/rtf', '.texi' : 'application/texinfo', '.texinfo': 'application/texinfo', '.toml' : 'application/toml', '.trig' : 'application/trig', '.m3u' : 'application/vnd.apple.mpegurl', '.m3u8' : 'application/vnd.apple.mpegurl', + '.dll' : 'application/vnd.microsoft.portable-executable', + '.exe' : 'application/vnd.microsoft.portable-executable', '.xls' : 'application/vnd.ms-excel', '.xlb' : 'application/vnd.ms-excel', '.eot' : 'application/vnd.ms-fontobject', @@ -648,7 +650,6 @@ def _default_mime_types(): '.pl' : 'text/plain', '.srt' : 'text/plain', '.rtx' : 'text/richtext', - '.rtf' : 'text/rtf', '.tsv' : 'text/tab-separated-values', '.vtt' : 'text/vtt', '.py' : 'text/x-python', @@ -681,11 +682,9 @@ def _default_mime_types(): # Please sort these too common_types = _common_types_default = { - '.rtf' : 'application/rtf', '.apk' : 'application/vnd.android.package-archive', '.midi': 'audio/midi', '.mid' : 'audio/midi', - '.jpg' : 'image/jpg', '.pict': 'image/pict', '.pct' : 'image/pict', '.pic' : 'image/pict', diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index ac478ee7f51..b115d99ab30 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -334,7 +334,7 @@ def load_module(self, fqname, fp, pathname, file_info): self.msgout(2, "load_module ->", m) return m if type == _PY_SOURCE: - co = compile(fp.read(), pathname, 'exec') + co = compile(fp.read(), pathname, 'exec', module=fqname) elif type == _PY_COMPILED: try: data = fp.read() diff --git a/Lib/multiprocessing/heap.py b/Lib/multiprocessing/heap.py index 6217dfe1268..5c835648395 100644 --- a/Lib/multiprocessing/heap.py +++ b/Lib/multiprocessing/heap.py @@ -324,10 +324,6 @@ class BufferWrapper(object): _heap = Heap() def __init__(self, size): - if size < 0: - raise ValueError("Size {0:n} out of range".format(size)) - if sys.maxsize <= size: - raise OverflowError("Size {0:n} too large".format(size)) block = BufferWrapper._heap.malloc(size) self._state = (block, size) util.Finalize(self, BufferWrapper._heap.free, args=(block,)) diff --git a/Lib/multiprocessing/queues.py b/Lib/multiprocessing/queues.py index 925f0439000..981599acf5e 100644 --- a/Lib/multiprocessing/queues.py +++ b/Lib/multiprocessing/queues.py @@ -121,7 +121,7 @@ def get(self, block=True, timeout=None): def qsize(self): # Raises NotImplementedError on Mac OSX because of broken sem_getvalue() - return self._maxsize - self._sem._semlock._get_value() + return self._maxsize - self._sem.get_value() def empty(self): return not self._poll() diff --git a/Lib/multiprocessing/resource_tracker.py b/Lib/multiprocessing/resource_tracker.py index c53092f6e34..3606d1effb4 100644 --- a/Lib/multiprocessing/resource_tracker.py +++ b/Lib/multiprocessing/resource_tracker.py @@ -15,6 +15,7 @@ # this resource tracker process, "killall python" would probably leave unlinked # resources. +import base64 import os import signal import sys @@ -22,6 +23,8 @@ import warnings from collections import deque +import json + from . import spawn from . import util @@ -65,6 +68,13 @@ def __init__(self): self._exitcode = None self._reentrant_messages = deque() + # True to use colon-separated lines, rather than JSON lines, + # for internal communication. (Mainly for testing). + # Filenames not supported by the simple format will always be sent + # using JSON. + # The reader should understand all formats. + self._use_simple_format = False + def _reentrant_call_error(self): # gh-109629: this happens if an explicit call to the ResourceTracker # gets interrupted by a garbage collection, invoking a finalizer (*) @@ -111,7 +121,12 @@ def _stop_locked( close(self._fd) self._fd = None - _, status = waitpid(self._pid, 0) + try: + _, status = waitpid(self._pid, 0) + except ChildProcessError: + self._pid = None + self._exitcode = None + return self._pid = None @@ -191,6 +206,19 @@ def _launch(self): finally: os.close(r) + def _make_probe_message(self): + """Return a probe message.""" + if self._use_simple_format: + return b'PROBE:0:noop\n' + return ( + json.dumps( + {"cmd": "PROBE", "rtype": "noop"}, + ensure_ascii=True, + separators=(",", ":"), + ) + + "\n" + ).encode("ascii") + def _ensure_running_and_write(self, msg=None): with self._lock: if self._lock._recursion_count() > 1: @@ -202,7 +230,7 @@ def _ensure_running_and_write(self, msg=None): if self._fd is not None: # resource tracker was launched before, is it still running? if msg is None: - to_send = b'PROBE:0:noop\n' + to_send = self._make_probe_message() else: to_send = msg try: @@ -229,7 +257,7 @@ def _check_alive(self): try: # We cannot use send here as it calls ensure_running, creating # a cycle. - os.write(self._fd, b'PROBE:0:noop\n') + os.write(self._fd, self._make_probe_message()) except OSError: return False else: @@ -248,11 +276,35 @@ def _write(self, msg): assert nbytes == len(msg), f"{nbytes=} != {len(msg)=}" def _send(self, cmd, name, rtype): - msg = f"{cmd}:{name}:{rtype}\n".encode("ascii") - if len(msg) > 512: - # posix guarantees that writes to a pipe of less than PIPE_BUF - # bytes are atomic, and that PIPE_BUF >= 512 - raise ValueError('msg too long') + if self._use_simple_format and '\n' not in name: + msg = f"{cmd}:{name}:{rtype}\n".encode("ascii") + if len(msg) > 512: + # posix guarantees that writes to a pipe of less than PIPE_BUF + # bytes are atomic, and that PIPE_BUF >= 512 + raise ValueError('msg too long') + self._ensure_running_and_write(msg) + return + + # POSIX guarantees that writes to a pipe of less than PIPE_BUF (512 on Linux) + # bytes are atomic. Therefore, we want the message to be shorter than 512 bytes. + # POSIX shm_open() and sem_open() require the name, including its leading slash, + # to be at most NAME_MAX bytes (255 on Linux) + # With json.dump(..., ensure_ascii=True) every non-ASCII byte becomes a 6-char + # escape like \uDC80. + # As we want the overall message to be kept atomic and therefore smaller than 512, + # we encode encode the raw name bytes with URL-safe Base64 - so a 255 long name + # will not exceed 340 bytes. + b = name.encode('utf-8', 'surrogateescape') + if len(b) > 255: + raise ValueError('shared memory name too long (max 255 bytes)') + b64 = base64.urlsafe_b64encode(b).decode('ascii') + + payload = {"cmd": cmd, "rtype": rtype, "base64_name": b64} + msg = (json.dumps(payload, ensure_ascii=True, separators=(",", ":")) + "\n").encode("ascii") + + # The entire JSON message is guaranteed < PIPE_BUF (512 bytes) by construction. + assert len(msg) <= 512, f"internal error: message too long ({len(msg)} bytes)" + assert msg.startswith(b'{') self._ensure_running_and_write(msg) @@ -263,6 +315,30 @@ def _send(self, cmd, name, rtype): getfd = _resource_tracker.getfd +def _decode_message(line): + if line.startswith(b'{'): + try: + obj = json.loads(line.decode('ascii')) + except Exception as e: + raise ValueError("malformed resource_tracker message: %r" % (line,)) from e + + cmd = obj["cmd"] + rtype = obj["rtype"] + b64 = obj.get("base64_name", "") + + if not isinstance(cmd, str) or not isinstance(rtype, str) or not isinstance(b64, str): + raise ValueError("malformed resource_tracker fields: %r" % (obj,)) + + try: + name = base64.urlsafe_b64decode(b64).decode('utf-8', 'surrogateescape') + except ValueError as e: + raise ValueError("malformed resource_tracker base64_name: %r" % (b64,)) from e + else: + cmd, rest = line.strip().decode('ascii').split(':', maxsplit=1) + name, rtype = rest.rsplit(':', maxsplit=1) + return cmd, rtype, name + + def main(fd): '''Run resource tracker.''' # protect the process from ^C and "killall python" etc @@ -285,7 +361,7 @@ def main(fd): with open(fd, 'rb') as f: for line in f: try: - cmd, name, rtype = line.strip().decode('ascii').split(':') + cmd, rtype, name = _decode_message(line) cleanup_func = _CLEANUP_FUNCS.get(rtype, None) if cleanup_func is None: raise ValueError( diff --git a/Lib/multiprocessing/synchronize.py b/Lib/multiprocessing/synchronize.py index 30425047e98..9188114ae28 100644 --- a/Lib/multiprocessing/synchronize.py +++ b/Lib/multiprocessing/synchronize.py @@ -135,11 +135,16 @@ def __init__(self, value=1, *, ctx): SemLock.__init__(self, SEMAPHORE, value, SEM_VALUE_MAX, ctx=ctx) def get_value(self): + '''Returns current value of Semaphore. + + Raises NotImplementedError on Mac OSX + because of broken sem_getvalue(). + ''' return self._semlock._get_value() def __repr__(self): try: - value = self._semlock._get_value() + value = self.get_value() except Exception: value = 'unknown' return '<%s(value=%s)>' % (self.__class__.__name__, value) @@ -155,7 +160,7 @@ def __init__(self, value=1, *, ctx): def __repr__(self): try: - value = self._semlock._get_value() + value = self.get_value() except Exception: value = 'unknown' return '<%s(value=%s, maxvalue=%s)>' % \ @@ -247,8 +252,8 @@ def _make_methods(self): def __repr__(self): try: - num_waiters = (self._sleeping_count._semlock._get_value() - - self._woken_count._semlock._get_value()) + num_waiters = (self._sleeping_count.get_value() - + self._woken_count.get_value()) except Exception: num_waiters = 'unknown' return '<%s(%s, %s)>' % (self.__class__.__name__, self._lock, num_waiters) diff --git a/Lib/multiprocessing/util.py b/Lib/multiprocessing/util.py index a1a537dd48d..549fb07c275 100644 --- a/Lib/multiprocessing/util.py +++ b/Lib/multiprocessing/util.py @@ -126,12 +126,14 @@ def is_abstract_socket_namespace(address): # Function returning a temp directory which will be removed on exit # -# Maximum length of a socket file path is usually between 92 and 108 [1], -# but Linux is known to use a size of 108 [2]. BSD-based systems usually -# use a size of 104 or 108 and Windows does not create AF_UNIX sockets. +# Maximum length of a NULL-terminated [1] socket file path is usually +# between 92 and 108 [2], but Linux is known to use a size of 108 [3]. +# BSD-based systems usually use a size of 104 or 108 and Windows does +# not create AF_UNIX sockets. # -# [1]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_un.h.html -# [2]: https://man7.org/linux/man-pages/man7/unix.7.html. +# [1]: https://github.com/python/cpython/issues/140734 +# [2]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_un.h.html +# [3]: https://man7.org/linux/man-pages/man7/unix.7.html if sys.platform == 'linux': _SUN_PATH_MAX = 108 @@ -171,11 +173,13 @@ def _get_base_temp_dir(tempfile): # generated by tempfile._RandomNameSequence, which, by design, # is 8 characters long. # - # Thus, the length of socket filename will be: + # Thus, the socket file path length (without NULL terminator) will be: # # len(base_tempdir + '/pymp-XXXXXXXX' + '/sock-XXXXXXXX') sun_path_len = len(base_tempdir) + 14 + 14 - if sun_path_len <= _SUN_PATH_MAX: + # Strict inequality to account for the NULL terminator. + # See https://github.com/python/cpython/issues/140734. + if sun_path_len < _SUN_PATH_MAX: return base_tempdir # Fallback to the default system-wide temporary directory. # This ignores user-defined environment variables. @@ -201,7 +205,7 @@ def _get_base_temp_dir(tempfile): return base_tempdir warn("Ignoring user-defined temporary directory: %s", base_tempdir) # at most max(map(len, dirlist)) + 14 + 14 = 36 characters - assert len(base_system_tempdir) + 14 + 14 <= _SUN_PATH_MAX + assert len(base_system_tempdir) + 14 + 14 < _SUN_PATH_MAX return base_system_tempdir def get_temp_dir(): diff --git a/Lib/pdb.py b/Lib/pdb.py index fdc74198582..1506e3d4709 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -130,7 +130,7 @@ def find_first_executable_line(code): return code.co_firstlineno def find_function(funcname, filename): - cre = re.compile(r'def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname)) + cre = re.compile(r'(?:async\s+)?def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname)) try: fp = tokenize.open(filename) except OSError: @@ -346,8 +346,8 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, bdb.Bdb.__init__(self, skip=skip, backend=backend if backend else get_default_backend()) cmd.Cmd.__init__(self, completekey, stdin, stdout) sys.audit("pdb.Pdb") - if stdout: - self.use_rawinput = 0 + if stdin: + self.use_rawinput = False self.prompt = '(Pdb) ' self.aliases = {} self.displaying = {} @@ -398,6 +398,12 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, self._current_task = None + self.lineno = None + self.stack = [] + self.curindex = 0 + self.curframe = None + self._user_requested_quit = False + def set_trace(self, frame=None, *, commands=None): Pdb._last_pdb_instance = self if frame is None: @@ -474,7 +480,7 @@ def forget(self): self.lineno = None self.stack = [] self.curindex = 0 - if hasattr(self, 'curframe') and self.curframe: + if self.curframe: self.curframe.f_globals.pop('__pdb_convenience_variables', None) self.curframe = None self.tb_lineno.clear() @@ -648,7 +654,7 @@ def _show_display(self): def _get_tb_and_exceptions(self, tb_or_exc): """ - Given a tracecack or an exception, return a tuple of chained exceptions + Given a traceback or an exception, return a tuple of chained exceptions and current traceback to inspect. This will deal with selecting the right ``__cause__`` or ``__context__`` @@ -1481,7 +1487,9 @@ def lineinfo(self, identifier): f = self.lookupmodule(parts[0]) if f: fname = f - item = parts[1] + item = parts[1] + else: + return failed answer = find_function(item, self.canonic(fname)) return answer or failed @@ -1493,7 +1501,7 @@ def checkline(self, filename, lineno, module_globals=None): """ # this method should be callable before starting debugging, so default # to "no globals" if there is no current frame - frame = getattr(self, 'curframe', None) + frame = self.curframe if module_globals is None: module_globals = frame.f_globals if frame else None line = linecache.getline(filename, lineno, module_globals) @@ -2423,7 +2431,9 @@ def print_stack_trace(self, count=None): except KeyboardInterrupt: pass - def print_stack_entry(self, frame_lineno, prompt_prefix=line_prefix): + def print_stack_entry(self, frame_lineno, prompt_prefix=None): + if prompt_prefix is None: + prompt_prefix = line_prefix frame, lineno = frame_lineno if frame is self.curframe: prefix = '> ' @@ -3542,7 +3552,15 @@ def exit_with_permission_help_text(): sys.exit(1) -def main(): +def parse_args(): + # We want pdb to be as intuitive as possible to users, so we need to do some + # heuristic parsing to deal with ambiguity. + # For example: + # "python -m pdb -m foo -p 1" should pass "-p 1" to "foo". + # "python -m pdb foo.py -m bar" should pass "-m bar" to "foo.py". + # "python -m pdb -m foo -m bar" should pass "-m bar" to "foo". + # This require some customized parsing logic to find the actual debug target. + import argparse parser = argparse.ArgumentParser( @@ -3553,28 +3571,48 @@ def main(): color=True, ) - # We need to maunally get the script from args, because the first positional - # arguments could be either the script we need to debug, or the argument - # to the -m module + # Get all the commands out first. For backwards compatibility, we allow + # -c commands to be after the target. parser.add_argument('-c', '--command', action='append', default=[], metavar='command', dest='commands', help='pdb commands to execute as if given in a .pdbrc file') - parser.add_argument('-m', metavar='module', dest='module') - parser.add_argument('-p', '--pid', type=int, help="attach to the specified PID", default=None) - - if len(sys.argv) == 1: - # If no arguments were given (python -m pdb), print the whole help message. - # Without this check, argparse would only complain about missing required arguments. - parser.print_help() - sys.exit(2) opts, args = parser.parse_known_args() - if opts.pid: - # If attaching to a remote pid, unrecognized arguments are not allowed. - # This will raise an error if there are extra unrecognized arguments. - opts = parser.parse_args() - if opts.module: - parser.error("argument -m: not allowed with argument --pid") + if not args: + # If no arguments were given (python -m pdb), print the whole help message. + # Without this check, argparse would only complain about missing required arguments. + # We need to add the arguments definitions here to get a proper help message. + parser.add_argument('-m', metavar='module', dest='module') + parser.add_argument('-p', '--pid', type=int, help="attach to the specified PID", default=None) + parser.print_help() + sys.exit(2) + elif args[0] == '-p' or args[0] == '--pid': + # Attach to a pid + parser.add_argument('-p', '--pid', type=int, help="attach to the specified PID", default=None) + opts, args = parser.parse_known_args() + if args: + # For --pid, any extra arguments are invalid. + parser.error(f"unrecognized arguments: {' '.join(args)}") + elif args[0] == '-m': + # Debug a module, we only need the first -m module argument. + # The rest is passed to the module itself. + parser.add_argument('-m', metavar='module', dest='module') + opt_module = parser.parse_args(args[:2]) + opts.module = opt_module.module + args = args[2:] + elif args[0].startswith('-'): + # Invalid argument before the script name. + invalid_args = list(itertools.takewhile(lambda a: a.startswith('-'), args)) + parser.error(f"unrecognized arguments: {' '.join(invalid_args)}") + + # Otherwise it's debugging a script and we already parsed all -c commands. + + return opts, args + +def main(): + opts, args = parse_args() + + if getattr(opts, 'pid', None) is not None: try: attach(opts.pid, opts.commands) except RuntimeError: @@ -3586,30 +3624,10 @@ def main(): except PermissionError: exit_with_permission_help_text() return - elif opts.module: - # If a module is being debugged, we consider the arguments after "-m module" to - # be potential arguments to the module itself. We need to parse the arguments - # before "-m" to check if there is any invalid argument. - # e.g. "python -m pdb -m foo --spam" means passing "--spam" to "foo" - # "python -m pdb --spam -m foo" means passing "--spam" to "pdb" and is invalid - idx = sys.argv.index('-m') - args_to_pdb = sys.argv[1:idx] - # This will raise an error if there are invalid arguments - parser.parse_args(args_to_pdb) - else: - # If a script is being debugged, then pdb expects the script name as the first argument. - # Anything before the script is considered an argument to pdb itself, which would - # be invalid because it's not parsed by argparse. - invalid_args = list(itertools.takewhile(lambda a: a.startswith('-'), args)) - if invalid_args: - parser.error(f"unrecognized arguments: {' '.join(invalid_args)}") - - if opts.module: + elif getattr(opts, 'module', None) is not None: file = opts.module target = _ModuleTarget(file) else: - if not args: - parser.error("no module or script to run") file = args.pop(0) if file.endswith('.pyz'): target = _ZipTarget(file) diff --git a/Lib/pickle.py b/Lib/pickle.py index 729c215514a..f3025776623 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -189,6 +189,11 @@ def __init__(self, value): __all__.extend(x for x in dir() if x.isupper() and not x.startswith('_')) +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = (1 << 20) + + class _Framer: _FRAME_SIZE_MIN = 4 @@ -287,7 +292,7 @@ def read(self, n): "pickle exhausted before end of frame") return data else: - return self.file_read(n) + return self._chunked_file_read(n) def readline(self): if self.current_frame: @@ -302,11 +307,23 @@ def readline(self): else: return self.file_readline() + def _chunked_file_read(self, size): + cursize = min(size, _MIN_READ_BUF_SIZE) + b = self.file_read(cursize) + while cursize < size and len(b) == cursize: + delta = min(cursize, size - cursize) + b += self.file_read(delta) + cursize += delta + return b + def load_frame(self, frame_size): if self.current_frame and self.current_frame.read() != b'': raise UnpicklingError( "beginning of a new frame before end of current frame") - self.current_frame = io.BytesIO(self.file_read(frame_size)) + data = self._chunked_file_read(frame_size) + if len(data) < frame_size: + raise EOFError + self.current_frame = io.BytesIO(data) # Tools used for pickling. @@ -1496,12 +1513,17 @@ def load_binbytes8(self): dispatch[BINBYTES8[0]] = load_binbytes8 def load_bytearray8(self): - len, = unpack(' maxsize: + size, = unpack(' maxsize: raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size " "of %d bytes" % maxsize) - b = bytearray(len) - self.readinto(b) + cursize = min(size, _MIN_READ_BUF_SIZE) + b = bytearray(cursize) + if self.readinto(b) == cursize: + while cursize < size and len(b) == cursize: + delta = min(cursize, size - cursize) + b += self.read(delta) + cursize += delta self.append(b) dispatch[BYTEARRAY8[0]] = load_bytearray8 diff --git a/Lib/pickletools.py b/Lib/pickletools.py index 254b6c7fcc9..29baf3be7eb 100644 --- a/Lib/pickletools.py +++ b/Lib/pickletools.py @@ -2839,7 +2839,7 @@ def __init__(self, value): } -if __name__ == "__main__": +def _main(args=None): import argparse parser = argparse.ArgumentParser( description='disassemble one or more pickle files', @@ -2864,7 +2864,7 @@ def __init__(self, value): '-p', '--preamble', default="==> {name} <==", help='if more than one pickle file is specified, print this before' ' each disassembly') - args = parser.parse_args() + args = parser.parse_args(args) annotate = 30 if args.annotate else 0 memo = {} if args.memo else None if args.output is None: @@ -2885,3 +2885,7 @@ def __init__(self, value): finally: if output is not sys.stdout: output.close() + + +if __name__ == "__main__": + _main() diff --git a/Lib/platform.py b/Lib/platform.py index 4db93bea2a3..b5017dbdb02 100644 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -197,7 +197,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384): | (GLIBC_([0-9.]+)) | (libc(_\w+)?\.so(?:\.(\d[0-9.]*))?) | (musl-([0-9.]+)) - | (libc.musl(?:-\w+)?.so(?:\.(\d[0-9.]*))?) + | ((?:libc\.|ld-)musl(?:-\w+)?.so(?:\.(\d[0-9.]*))?) """, re.ASCII | re.VERBOSE) @@ -236,7 +236,7 @@ def libc_ver(executable=None, lib='', version='', chunksize=16384): elif V(glibcversion) > V(ver): ver = glibcversion elif so: - if lib != 'glibc': + if lib not in ('glibc', 'musl'): lib = 'libc' if soversion and (not ver or V(soversion) > V(ver)): ver = soversion diff --git a/Lib/plistlib.py b/Lib/plistlib.py index 67e832db217..655c51eea3d 100644 --- a/Lib/plistlib.py +++ b/Lib/plistlib.py @@ -73,6 +73,9 @@ PlistFormat = enum.Enum('PlistFormat', 'FMT_XML FMT_BINARY', module=__name__) globals().update(PlistFormat.__members__) +# Data larger than this will be read in chunks, to prevent extreme +# overallocation. +_MIN_READ_BUF_SIZE = 1 << 20 class UID: def __init__(self, data): @@ -508,12 +511,24 @@ def _get_size(self, tokenL): return tokenL + def _read(self, size): + cursize = min(size, _MIN_READ_BUF_SIZE) + data = self._fp.read(cursize) + while True: + if len(data) != cursize: + raise InvalidFileException + if cursize == size: + return data + delta = min(cursize, size - cursize) + data += self._fp.read(delta) + cursize += delta + def _read_ints(self, n, size): - data = self._fp.read(size * n) + data = self._read(size * n) if size in _BINARY_FORMAT: return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data) else: - if not size or len(data) != size * n: + if not size: raise InvalidFileException() return tuple(int.from_bytes(data[i: i + size], 'big') for i in range(0, size * n, size)) @@ -573,22 +588,16 @@ def _read_object(self, ref): elif tokenH == 0x40: # data s = self._get_size(tokenL) - result = self._fp.read(s) - if len(result) != s: - raise InvalidFileException() + result = self._read(s) elif tokenH == 0x50: # ascii string s = self._get_size(tokenL) - data = self._fp.read(s) - if len(data) != s: - raise InvalidFileException() + data = self._read(s) result = data.decode('ascii') elif tokenH == 0x60: # unicode string s = self._get_size(tokenL) * 2 - data = self._fp.read(s) - if len(data) != s: - raise InvalidFileException() + data = self._read(s) result = data.decode('utf-16be') elif tokenH == 0x80: # UID diff --git a/Lib/profiling/sampling/__init__.py b/Lib/profiling/sampling/__init__.py index b493c6aa7eb..6a0bb5e5c2f 100644 --- a/Lib/profiling/sampling/__init__.py +++ b/Lib/profiling/sampling/__init__.py @@ -7,7 +7,8 @@ from .collector import Collector from .pstats_collector import PstatsCollector from .stack_collector import CollapsedStackCollector +from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector from .string_table import StringTable -__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "GeckoCollector", "StringTable") +__all__ = ("Collector", "PstatsCollector", "CollapsedStackCollector", "HeatmapCollector", "GeckoCollector", "StringTable") diff --git a/Lib/profiling/sampling/__main__.py b/Lib/profiling/sampling/__main__.py index a76ca62e2cd..47bd3a0113e 100644 --- a/Lib/profiling/sampling/__main__.py +++ b/Lib/profiling/sampling/__main__.py @@ -15,7 +15,7 @@ """ LINUX_PERMISSION_ERROR = """ -🔒 Tachyon was unable to acess process memory. This could be because tachyon +🔒 Tachyon was unable to access process memory. This could be because tachyon has insufficient privileges (the required capability is CAP_SYS_PTRACE). Unprivileged processes cannot trace processes that they cannot send signals to or those running set-user-ID/set-group-ID programs, for security reasons. @@ -45,7 +45,7 @@ system restrictions or missing privileges. """ -from .sample import main +from .cli import main def handle_permission_error(): """Handle PermissionError by displaying appropriate error message.""" diff --git a/Lib/profiling/sampling/_css_utils.py b/Lib/profiling/sampling/_css_utils.py new file mode 100644 index 00000000000..40912e9b352 --- /dev/null +++ b/Lib/profiling/sampling/_css_utils.py @@ -0,0 +1,22 @@ +import importlib.resources + + +def get_combined_css(component: str) -> str: + template_dir = importlib.resources.files(__package__) + + base_css = (template_dir / "_shared_assets" / "base.css").read_text(encoding="utf-8") + + if component == "flamegraph": + component_css = ( + template_dir / "_flamegraph_assets" / "flamegraph.css" + ).read_text(encoding="utf-8") + elif component == "heatmap": + component_css = (template_dir / "_heatmap_assets" / "heatmap.css").read_text( + encoding="utf-8" + ) + else: + raise ValueError( + f"Unknown component: {component}. Expected 'flamegraph' or 'heatmap'." + ) + + return f"{base_css}\n\n{component_css}" diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css new file mode 100644 index 00000000000..c75f2324b6d --- /dev/null +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.css @@ -0,0 +1,901 @@ +/* ========================================================================== + Flamegraph Viewer - Component-Specific CSS + + DEPENDENCY: Requires _shared_assets/base.css to be loaded first + This file extends the shared foundation with flamegraph-specific styles. + ========================================================================== */ + +/* -------------------------------------------------------------------------- + Layout Overrides (Flamegraph-specific) + -------------------------------------------------------------------------- */ + +html, body { + height: 100%; + overflow: hidden; +} + +.app-layout { + height: 100vh; +} + +.main-content { + display: flex; + flex: 1; + min-height: 0; +} + +/* -------------------------------------------------------------------------- + Search Input (Flamegraph-specific) + -------------------------------------------------------------------------- */ + +.search-wrapper { + flex: 1; + max-width: 360px; + position: relative; +} + +.search-input { + width: 100%; + padding: 8px 36px 8px 14px; + font-family: var(--font-sans); + font-size: 13px; + color: #2e3338; + background: rgba(255, 255, 255, 0.95); + border: 2px solid rgba(255, 255, 255, 0.3); + border-radius: 20px; + outline: none; + transition: all var(--transition-fast); + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); +} + +.search-input::placeholder { + color: #6c757d; +} + +.search-input:focus { + border-color: rgba(255, 255, 255, 0.8); + background: white; + box-shadow: 0 4px 16px rgba(0, 0, 0, 0.15); +} + +/* Dark theme search input */ +[data-theme="dark"] .search-input { + color: #e6edf3; + background: rgba(33, 38, 45, 0.95); + border: 2px solid rgba(88, 166, 255, 0.3); + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3); +} + +[data-theme="dark"] .search-input::placeholder { + color: #8b949e; +} + +[data-theme="dark"] .search-input:focus { + border-color: rgba(88, 166, 255, 0.6); + background: rgba(33, 38, 45, 1); + box-shadow: 0 4px 16px rgba(88, 166, 255, 0.2); +} + +.search-input.has-matches { + border-color: rgba(40, 167, 69, 0.8); + box-shadow: 0 4px 16px rgba(40, 167, 69, 0.2); +} + +.search-input.no-matches { + border-color: rgba(220, 53, 69, 0.8); + box-shadow: 0 4px 16px rgba(220, 53, 69, 0.2); +} + +.search-clear { + position: absolute; + right: 10px; + top: 50%; + transform: translateY(-50%); + width: 20px; + height: 20px; + padding: 0; + display: none; + align-items: center; + justify-content: center; + font-size: 14px; + line-height: 1; + color: #6c757d; + background: transparent; + border: none; + border-radius: 50%; + cursor: pointer; + transition: color var(--transition-fast); +} + +.search-clear:hover { + color: #2e3338; +} + +[data-theme="dark"] .search-clear { + color: #8b949e; +} + +[data-theme="dark"] .search-clear:hover { + color: #e6edf3; +} + +.search-wrapper.has-value .search-clear { + display: flex; +} + +/* -------------------------------------------------------------------------- + Sidebar + -------------------------------------------------------------------------- */ + +.sidebar { + width: var(--sidebar-width); + background: var(--bg-secondary); + border-right: 1px solid var(--border); + display: flex; + flex-direction: column; + flex-shrink: 0; + overflow: hidden; + position: relative; +} + +.sidebar.collapsed { + width: var(--sidebar-collapsed) !important; + transition: width var(--transition-normal); +} + +.sidebar-toggle { + position: absolute; + top: 12px; + right: 10px; + width: 26px; + height: 26px; + display: flex; + align-items: center; + justify-content: center; + color: var(--text-muted); + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 6px; + cursor: pointer; + transition: all var(--transition-fast); + z-index: 10; +} + +.sidebar-toggle svg { + transition: transform var(--transition-fast); +} + +.sidebar-toggle:hover { + color: var(--accent); + border-color: var(--accent); + background: var(--accent-glow); +} + +.sidebar.collapsed .sidebar-toggle { + right: 9px; +} + +.sidebar.collapsed .sidebar-toggle svg { + transform: rotate(180deg); +} + +.sidebar-content { + flex: 1; + overflow-y: auto; + padding: 44px 14px 14px; +} + +.sidebar.collapsed .sidebar-content { + display: none; +} + +.sidebar-resize-handle { + position: absolute; + top: 0; + right: 0; + width: 6px; + height: 100%; + cursor: col-resize; + background: transparent; + transition: background var(--transition-fast); + z-index: 11; +} + +.sidebar-resize-handle:hover, +.sidebar-resize-handle.resizing { + background: var(--python-gold); +} + +.sidebar-resize-handle::before { + content: ''; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + width: 2px; + height: 40px; + background: var(--border); + border-radius: 1px; + opacity: 0; + transition: opacity var(--transition-fast); +} + +.sidebar-resize-handle:hover::before { + opacity: 1; +} + +.sidebar.collapsed .sidebar-resize-handle { + display: none; +} + +body.resizing-sidebar { + cursor: col-resize; + user-select: none; +} + +/* Sidebar Logo */ +.sidebar-logo { + display: flex; + justify-content: center; + margin-bottom: 16px; +} + +.sidebar-logo-img { + width: 90px; + height: 90px; + display: flex; + align-items: center; + justify-content: center; +} + +.sidebar-logo-img svg, +.sidebar-logo-img img { + width: 100%; + height: 100%; + object-fit: contain; +} + +/* Sidebar sections */ +.sidebar-section { + margin-bottom: 20px; +} + +.sidebar-section:last-child { + margin-bottom: 0; +} + +.section-title { + font-size: 10px; + font-weight: 700; + text-transform: uppercase; + letter-spacing: 0.8px; + color: var(--accent); + margin: 0; + flex: 1; +} + +/* Collapsible sections */ +.collapsible .section-header { + display: flex; + align-items: center; + width: 100%; + padding: 0 0 8px 0; + margin-bottom: 10px; + background: none; + border: none; + border-bottom: 2px solid var(--python-gold); + cursor: pointer; + transition: all var(--transition-fast); +} + +.collapsible .section-header:hover { + opacity: 0.8; +} + +.section-chevron { + color: var(--text-muted); + transition: transform var(--transition-fast); +} + +.collapsible.collapsed .section-chevron { + transform: rotate(-90deg); +} + +.section-content { + overflow: hidden; + transition: max-height var(--transition-normal), opacity var(--transition-normal); + max-height: 1000px; + opacity: 1; +} + +.collapsible.collapsed .section-content { + max-height: 0; + opacity: 0; + margin-bottom: -10px; +} + +/* -------------------------------------------------------------------------- + Profile Summary Cards + -------------------------------------------------------------------------- */ + +.summary-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 6px; +} + +.summary-card { + display: flex; + align-items: center; + gap: 8px; + padding: 8px 10px; + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px; + transition: all var(--transition-fast); + animation: slideUp 0.4s ease-out backwards; + animation-delay: calc(var(--i, 0) * 0.05s); + overflow: hidden; +} + +.summary-card:nth-child(1) { --i: 0; } +.summary-card:nth-child(2) { --i: 1; } +.summary-card:nth-child(3) { --i: 2; } +.summary-card:nth-child(4) { --i: 3; } + +.summary-card:hover { + border-color: var(--accent); + background: var(--accent-glow); +} + +.summary-icon { + font-size: 16px; + width: 28px; + height: 28px; + display: flex; + align-items: center; + justify-content: center; + background: var(--bg-tertiary); + border-radius: 6px; + flex-shrink: 0; +} + +.summary-data { + min-width: 0; + flex: 1; + overflow: hidden; +} + +.summary-value { + font-family: var(--font-mono); + font-size: 13px; + font-weight: 700; + color: var(--accent); + line-height: 1.2; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.summary-label { + font-size: 8px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.2px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +/* Efficiency Bar */ +.efficiency-section { + margin-top: 10px; + padding-top: 10px; + border-top: 1px solid var(--border); +} + +.efficiency-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 5px; +} + +.efficiency-label { + font-size: 9px; + font-weight: 600; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.2px; +} + +.efficiency-value { + font-family: var(--font-mono); + font-size: 11px; + font-weight: 700; + color: var(--accent); +} + +.efficiency-bar { + height: 6px; + background: var(--bg-tertiary); + border-radius: 3px; + overflow: hidden; +} + +.efficiency-fill { + height: 100%; + background: linear-gradient(90deg, #28a745 0%, #20c997 50%, #17a2b8 100%); + border-radius: 3px; + transition: width 0.6s ease-out; + position: relative; + overflow: hidden; +} + +.efficiency-fill::after { + content: ''; + position: absolute; + top: 0; + left: -100%; + width: 100%; + height: 100%; + background: linear-gradient( + 90deg, + transparent 0%, + rgba(255, 255, 255, 0.4) 50%, + transparent 100% + ); + animation: shimmer 2s ease-in-out infinite; +} + +/* -------------------------------------------------------------------------- + Thread Stats Grid (in Sidebar) + -------------------------------------------------------------------------- */ + +.thread-stats-section { + display: block; +} + +.stats-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 8px; +} + +.stat-tile { + background: var(--bg-primary); + border-radius: 8px; + padding: 10px; + text-align: center; + border: 2px solid var(--border); + transition: all var(--transition-fast); + animation: fadeIn 0.4s ease-out backwards; + animation-delay: calc(var(--i, 0) * 0.05s); +} + +.stat-tile:nth-child(1) { --i: 0; } +.stat-tile:nth-child(2) { --i: 1; } +.stat-tile:nth-child(3) { --i: 2; } +.stat-tile:nth-child(4) { --i: 3; } + +.stat-tile:hover { + transform: translateY(-2px); + box-shadow: var(--shadow-sm); +} + +.stat-tile-value { + font-family: var(--font-mono); + font-size: 16px; + font-weight: 700; + color: var(--text-primary); + line-height: 1.2; +} + +.stat-tile-label { + font-size: 9px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.3px; + color: var(--text-muted); + margin-top: 2px; +} + +/* Stat tile color variants */ +.stat-tile--green { --tile-color: 40, 167, 69; --tile-text: #28a745; } +.stat-tile--red { --tile-color: 220, 53, 69; --tile-text: #dc3545; } +.stat-tile--yellow { --tile-color: 255, 193, 7; --tile-text: #d39e00; } +.stat-tile--purple { --tile-color: 111, 66, 193; --tile-text: #6f42c1; } + +.stat-tile[class*="--"] { + border-color: rgba(var(--tile-color), 0.4); + background: linear-gradient(135deg, rgba(var(--tile-color), 0.08) 0%, var(--bg-primary) 100%); +} +.stat-tile[class*="--"] .stat-tile-value { color: var(--tile-text); } + +/* -------------------------------------------------------------------------- + Hotspot Cards + -------------------------------------------------------------------------- */ + +.hotspot { + display: flex; + align-items: flex-start; + gap: 10px; + padding: 10px; + margin-bottom: 8px; + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px; + cursor: pointer; + transition: all var(--transition-fast); + opacity: 0; + transform: translateY(8px); + box-shadow: var(--shadow-sm); +} + +.hotspot.visible { + opacity: 1; + transform: translateY(0); +} + +.hotspot:hover { + border-color: var(--accent); + box-shadow: var(--shadow-md); + transform: translateY(-2px); +} + +.hotspot.active { + border-color: var(--python-gold); + background: var(--accent-glow); + box-shadow: 0 0 0 3px var(--accent-glow); +} + +.hotspot:last-child { + margin-bottom: 0; +} + +.hotspot-rank { + width: 26px; + height: 26px; + border-radius: 50%; + display: flex; + align-items: center; + justify-content: center; + font-weight: 700; + font-size: 12px; + flex-shrink: 0; + background: linear-gradient(135deg, var(--python-blue) 0%, var(--python-blue-light) 100%); + color: white; + box-shadow: 0 2px 4px rgba(55, 118, 171, 0.3); +} + +.hotspot-rank--1 { background: linear-gradient(135deg, #d4af37, #f4d03f); color: #5a4a00; } +.hotspot-rank--2 { background: linear-gradient(135deg, #a8a8a8, #c0c0c0); color: #4a4a4a; } +.hotspot-rank--3 { background: linear-gradient(135deg, #cd7f32, #e6a55a); color: #5a3d00; } + +.hotspot-info { + flex: 1; + min-width: 0; +} + +.hotspot-func { + font-family: var(--font-mono); + font-size: 11px; + font-weight: 600; + color: var(--text-primary); + line-height: 1.3; + word-break: break-word; + margin-bottom: 2px; +} + +.hotspot-file { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-muted); + margin-bottom: 3px; + word-break: break-all; +} + +.hotspot-stats { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-secondary); +} + +.hotspot-percent { + color: var(--accent); + font-weight: 600; +} + +/* -------------------------------------------------------------------------- + Legend + -------------------------------------------------------------------------- */ + +.legend-section { + margin-top: auto; + padding-top: 12px; +} + +.legend { + display: flex; + flex-direction: column; + gap: 4px; +} + +.legend-item { + display: flex; + align-items: center; + gap: 8px; + padding: 5px 8px; + background: var(--bg-primary); + border-radius: 4px; + border: 1px solid var(--border-subtle); + font-size: 11px; +} + +.legend-color { + width: 20px; + height: 10px; + border-radius: 2px; + flex-shrink: 0; + border: 1px solid rgba(0, 0, 0, 0.08); +} + +.legend-label { + color: var(--text-primary); + font-weight: 500; + flex: 1; +} + +.legend-range { + font-family: var(--font-mono); + font-size: 9px; + color: var(--text-muted); +} + +/* -------------------------------------------------------------------------- + Thread Filter + -------------------------------------------------------------------------- */ + +.filter-section { + padding-top: 12px; + border-top: 1px solid var(--border); +} + +.filter-label { + display: block; + font-size: 10px; + font-weight: 600; + color: var(--text-muted); + margin-bottom: 6px; +} + +.filter-select { + width: 100%; + padding: 7px 28px 7px 10px; + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-primary); + background: var(--bg-primary); + border: 2px solid var(--accent); + border-radius: 6px; + cursor: pointer; + outline: none; + appearance: none; + background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='%233776ab' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e"); + background-repeat: no-repeat; + background-position: right 6px center; + background-size: 14px; + transition: all var(--transition-fast); +} + +.filter-select:hover { + border-color: var(--accent-hover); + box-shadow: 0 2px 6px var(--accent-glow); +} + +.filter-select:focus { + border-color: var(--accent); + box-shadow: 0 0 0 3px var(--accent-glow); +} + +/* -------------------------------------------------------------------------- + Chart Area + -------------------------------------------------------------------------- */ + +.chart-area { + flex: 1; + min-width: 0; + overflow: hidden; + background: var(--bg-primary); + position: relative; +} + +#chart { + width: 100%; + height: 100%; + padding: 16px; + overflow: auto; +} + +/* D3 Flamegraph overrides */ +.d3-flame-graph rect { + stroke: rgba(55, 118, 171, 0.3); + stroke-width: 1px; + cursor: pointer; + transition: filter 0.1s ease; +} + +.d3-flame-graph rect:hover { + stroke: var(--python-blue); + stroke-width: 2px; + filter: brightness(1.08); +} + +.d3-flame-graph text { + font-family: var(--font-sans); + font-size: 12px; + font-weight: 500; + fill: var(--text-primary); + pointer-events: none; +} + +/* Search highlight */ +.d3-flame-graph rect.search-match { + stroke: #ff6b35 !important; + stroke-width: 2px !important; + stroke-dasharray: 4 2; +} + +.d3-flame-graph rect.search-dim { + opacity: 0.25; +} + +/* -------------------------------------------------------------------------- + Tooltip + -------------------------------------------------------------------------- */ + +.python-tooltip { + position: absolute; + z-index: 1000; + pointer-events: none; + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px; + padding: 14px; + max-width: 480px; + box-shadow: var(--shadow-lg); + font-family: var(--font-sans); + font-size: 13px; + color: var(--text-primary); + word-wrap: break-word; + overflow-wrap: break-word; + line-height: 1.5; +} + +.tooltip-header { + margin-bottom: 10px; +} + +.tooltip-title { + font-size: 14px; + font-weight: 600; + color: var(--accent); + line-height: 1.3; + word-break: break-word; + margin-bottom: 4px; +} + +.tooltip-location { + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-muted); + background: var(--bg-tertiary); + padding: 4px 8px; + border-radius: 4px; + word-break: break-all; +} + +.tooltip-stats { + display: grid; + grid-template-columns: auto 1fr; + gap: 4px 14px; + font-size: 12px; +} + +.tooltip-stat-label { + color: var(--text-secondary); + font-weight: 500; +} + +.tooltip-stat-value { + color: var(--text-primary); + font-weight: 600; +} + +.tooltip-stat-value.accent { + color: var(--accent); +} + +.tooltip-source { + margin-top: 10px; + padding-top: 10px; + border-top: 1px solid var(--border); +} + +.tooltip-source-title { + font-size: 11px; + font-weight: 600; + color: var(--accent); + margin-bottom: 6px; +} + +.tooltip-source-code { + font-family: var(--font-mono); + font-size: 10px; + line-height: 1.5; + background: var(--bg-tertiary); + border: 1px solid var(--border); + border-radius: 6px; + padding: 8px; + max-height: 140px; + overflow-y: auto; + white-space: pre-wrap; + word-break: break-all; +} + +.tooltip-source-line { + color: var(--text-secondary); + padding: 1px 0; +} + +.tooltip-source-line.current { + color: var(--accent); + font-weight: 600; +} + +.tooltip-hint { + margin-top: 10px; + padding-top: 8px; + border-top: 1px solid var(--border); + font-size: 11px; + color: var(--text-muted); + text-align: center; +} + +/* -------------------------------------------------------------------------- + Responsive (Flamegraph-specific) + -------------------------------------------------------------------------- */ + +@media (max-width: 900px) { + .sidebar { + position: fixed; + left: 0; + top: var(--topbar-height); + bottom: var(--statusbar-height); + z-index: 100; + box-shadow: var(--shadow-lg); + } + + .sidebar.collapsed { + width: var(--sidebar-collapsed); + } + + .search-wrapper { + max-width: 220px; + } +} + +@media (max-width: 600px) { + .search-wrapper { + max-width: 160px; + } + + .brand-info { + display: none; + } + + .stats-grid { + grid-template-columns: 1fr; + } +} diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js new file mode 100644 index 00000000000..494d156a8dd --- /dev/null +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph.js @@ -0,0 +1,1012 @@ +const EMBEDDED_DATA = {{FLAMEGRAPH_DATA}}; + +// Global string table for resolving string indices +let stringTable = []; +let originalData = null; +let currentThreadFilter = 'all'; + +// Heat colors are now defined in CSS variables (--heat-1 through --heat-8) +// and automatically switch with theme changes - no JS color arrays needed! + +// ============================================================================ +// String Resolution +// ============================================================================ + +function resolveString(index) { + if (index === null || index === undefined) { + return null; + } + if (typeof index === 'number' && index >= 0 && index < stringTable.length) { + return stringTable[index]; + } + return String(index); +} + +function resolveStringIndices(node) { + if (!node) return node; + + const resolved = { ...node }; + + if (typeof resolved.name === 'number') { + resolved.name = resolveString(resolved.name); + } + if (typeof resolved.filename === 'number') { + resolved.filename = resolveString(resolved.filename); + } + if (typeof resolved.funcname === 'number') { + resolved.funcname = resolveString(resolved.funcname); + } + + if (Array.isArray(resolved.source)) { + resolved.source = resolved.source.map(index => + typeof index === 'number' ? resolveString(index) : index + ); + } + + if (Array.isArray(resolved.children)) { + resolved.children = resolved.children.map(child => resolveStringIndices(child)); + } + + return resolved; +} + +// ============================================================================ +// Theme & UI Controls +// ============================================================================ + +function toggleTheme() { + const html = document.documentElement; + const current = html.getAttribute('data-theme') || 'light'; + const next = current === 'light' ? 'dark' : 'light'; + html.setAttribute('data-theme', next); + localStorage.setItem('flamegraph-theme', next); + + // Update theme button icon + const btn = document.getElementById('theme-btn'); + if (btn) { + btn.innerHTML = next === 'dark' ? '☼' : '☾'; // sun or moon + } + + // Re-render flamegraph with new theme colors + if (window.flamegraphData && originalData) { + const tooltip = createPythonTooltip(originalData); + const chart = createFlamegraph(tooltip, originalData.value); + renderFlamegraph(chart, window.flamegraphData); + } +} + +function toggleSidebar() { + const sidebar = document.getElementById('sidebar'); + if (sidebar) { + const isCollapsing = !sidebar.classList.contains('collapsed'); + + if (isCollapsing) { + // Save current width before collapsing + const currentWidth = sidebar.offsetWidth; + sidebar.dataset.expandedWidth = currentWidth; + localStorage.setItem('flamegraph-sidebar-width', currentWidth); + } else { + // Restore width when expanding + const savedWidth = sidebar.dataset.expandedWidth || localStorage.getItem('flamegraph-sidebar-width'); + if (savedWidth) { + sidebar.style.width = savedWidth + 'px'; + } + } + + sidebar.classList.toggle('collapsed'); + localStorage.setItem('flamegraph-sidebar', sidebar.classList.contains('collapsed') ? 'collapsed' : 'expanded'); + + // Resize chart after sidebar animation + setTimeout(() => { + resizeChart(); + }, 300); + } +} + +function resizeChart() { + if (window.flamegraphChart && window.flamegraphData) { + const chartArea = document.querySelector('.chart-area'); + if (chartArea) { + window.flamegraphChart.width(chartArea.clientWidth - 32); + d3.select("#chart").datum(window.flamegraphData).call(window.flamegraphChart); + } + } +} + +function toggleSection(sectionId) { + const section = document.getElementById(sectionId); + if (section) { + section.classList.toggle('collapsed'); + // Save state + const collapsedSections = JSON.parse(localStorage.getItem('flamegraph-collapsed-sections') || '{}'); + collapsedSections[sectionId] = section.classList.contains('collapsed'); + localStorage.setItem('flamegraph-collapsed-sections', JSON.stringify(collapsedSections)); + } +} + +function restoreUIState() { + // Restore theme + const savedTheme = localStorage.getItem('flamegraph-theme'); + if (savedTheme) { + document.documentElement.setAttribute('data-theme', savedTheme); + const btn = document.getElementById('theme-btn'); + if (btn) { + btn.innerHTML = savedTheme === 'dark' ? '☼' : '☾'; + } + } + + // Restore sidebar state + const savedSidebar = localStorage.getItem('flamegraph-sidebar'); + if (savedSidebar === 'collapsed') { + const sidebar = document.getElementById('sidebar'); + if (sidebar) sidebar.classList.add('collapsed'); + } + + // Restore sidebar width + const savedWidth = localStorage.getItem('flamegraph-sidebar-width'); + if (savedWidth) { + const sidebar = document.getElementById('sidebar'); + if (sidebar) { + sidebar.style.width = savedWidth + 'px'; + } + } + + // Restore collapsed sections + const collapsedSections = JSON.parse(localStorage.getItem('flamegraph-collapsed-sections') || '{}'); + for (const [sectionId, isCollapsed] of Object.entries(collapsedSections)) { + if (isCollapsed) { + const section = document.getElementById(sectionId); + if (section) section.classList.add('collapsed'); + } + } +} + +// ============================================================================ +// Status Bar +// ============================================================================ + +function updateStatusBar(nodeData, rootValue) { + const funcname = resolveString(nodeData.funcname) || resolveString(nodeData.name) || "--"; + const filename = resolveString(nodeData.filename) || ""; + const lineno = nodeData.lineno; + const timeMs = (nodeData.value / 1000).toFixed(2); + const percent = rootValue > 0 ? ((nodeData.value / rootValue) * 100).toFixed(1) : "0.0"; + + const locationEl = document.getElementById('status-location'); + const funcItem = document.getElementById('status-func-item'); + const timeItem = document.getElementById('status-time-item'); + const percentItem = document.getElementById('status-percent-item'); + + if (locationEl) locationEl.style.display = filename && filename !== "~" ? 'flex' : 'none'; + if (funcItem) funcItem.style.display = 'flex'; + if (timeItem) timeItem.style.display = 'flex'; + if (percentItem) percentItem.style.display = 'flex'; + + const fileEl = document.getElementById('status-file'); + if (fileEl && filename && filename !== "~") { + const basename = filename.split('/').pop(); + fileEl.textContent = lineno ? `${basename}:${lineno}` : basename; + } + + const funcEl = document.getElementById('status-func'); + if (funcEl) funcEl.textContent = funcname.length > 40 ? funcname.substring(0, 37) + '...' : funcname; + + const timeEl = document.getElementById('status-time'); + if (timeEl) timeEl.textContent = `${timeMs} ms`; + + const percentEl = document.getElementById('status-percent'); + if (percentEl) percentEl.textContent = `${percent}%`; +} + +function clearStatusBar() { + const ids = ['status-location', 'status-func-item', 'status-time-item', 'status-percent-item']; + ids.forEach(id => { + const el = document.getElementById(id); + if (el) el.style.display = 'none'; + }); +} + +// ============================================================================ +// Tooltip +// ============================================================================ + +function createPythonTooltip(data) { + const pythonTooltip = flamegraph.tooltip.defaultFlamegraphTooltip(); + + pythonTooltip.show = function (d, element) { + if (!this._tooltip) { + this._tooltip = d3.select("body") + .append("div") + .attr("class", "python-tooltip") + .style("opacity", 0); + } + + const timeMs = (d.data.value / 1000).toFixed(2); + const percentage = ((d.data.value / data.value) * 100).toFixed(2); + const calls = d.data.calls || 0; + const childCount = d.children ? d.children.length : 0; + const source = d.data.source; + + const funcname = resolveString(d.data.funcname) || resolveString(d.data.name); + const filename = resolveString(d.data.filename) || ""; + const isSpecialFrame = filename === "~"; + + // Build source section + let sourceSection = ""; + if (source && Array.isArray(source) && source.length > 0) { + const sourceLines = source + .map((line) => { + const isCurrent = line.startsWith("→"); + const escaped = line.replace(/&/g, "&").replace(//g, ">"); + return `
${escaped}
`; + }) + .join(""); + + sourceSection = ` +
+
Source Code:
+
${sourceLines}
+
`; + } + + const fileLocationHTML = isSpecialFrame ? "" : ` +
${filename}${d.data.lineno ? ":" + d.data.lineno : ""}
`; + + const tooltipHTML = ` +
+
${funcname}
+ ${fileLocationHTML} +
+
+ Execution Time: + ${timeMs} ms + + Percentage: + ${percentage}% + + ${calls > 0 ? ` + Function Calls: + ${calls.toLocaleString()} + ` : ''} + + ${childCount > 0 ? ` + Child Functions: + ${childCount} + ` : ''} +
+ ${sourceSection} +
+ ${childCount > 0 ? "Click to zoom into this function" : "Leaf function - no children"} +
+ `; + + // Position tooltip + const event = d3.event || window.event; + const mouseX = event.pageX || event.clientX; + const mouseY = event.pageY || event.clientY; + const padding = 12; + + this._tooltip.html(tooltipHTML); + + // Measure tooltip + const node = this._tooltip.style("display", "block").style("opacity", 0).node(); + const tooltipWidth = node.offsetWidth || 320; + const tooltipHeight = node.offsetHeight || 200; + + // Calculate position + let left = mouseX + padding; + let top = mouseY + padding; + + if (left + tooltipWidth > window.innerWidth) { + left = mouseX - tooltipWidth - padding; + if (left < 0) left = padding; + } + + if (top + tooltipHeight > window.innerHeight) { + top = mouseY - tooltipHeight - padding; + if (top < 0) top = padding; + } + + this._tooltip + .style("left", left + "px") + .style("top", top + "px") + .transition() + .duration(150) + .style("opacity", 1); + + // Update status bar + updateStatusBar(d.data, data.value); + }; + + pythonTooltip.hide = function () { + if (this._tooltip) { + this._tooltip.transition().duration(150).style("opacity", 0); + } + clearStatusBar(); + }; + + return pythonTooltip; +} + +// ============================================================================ +// Flamegraph Creation +// ============================================================================ + +function ensureLibraryLoaded() { + if (typeof flamegraph === "undefined") { + console.error("d3-flame-graph library not loaded"); + document.getElementById("chart").innerHTML = + '
Error: d3-flame-graph library failed to load
'; + throw new Error("d3-flame-graph library failed to load"); + } +} + +const HEAT_THRESHOLDS = [ + [0.6, 8], + [0.35, 7], + [0.18, 6], + [0.12, 5], + [0.06, 4], + [0.03, 3], + [0.01, 2], +]; + +function getHeatLevel(percentage) { + for (const [threshold, level] of HEAT_THRESHOLDS) { + if (percentage >= threshold) return level; + } + return 1; +} + +function getHeatColors() { + const style = getComputedStyle(document.documentElement); + const colors = {}; + for (let i = 1; i <= 8; i++) { + colors[i] = style.getPropertyValue(`--heat-${i}`).trim(); + } + return colors; +} + +function createFlamegraph(tooltip, rootValue) { + const chartArea = document.querySelector('.chart-area'); + const width = chartArea ? chartArea.clientWidth - 32 : window.innerWidth - 320; + const heatColors = getHeatColors(); + + let chart = flamegraph() + .width(width) + .cellHeight(20) + .transitionDuration(300) + .minFrameSize(1) + .tooltip(tooltip) + .inverted(true) + .setColorMapper(function (d) { + const percentage = d.data.value / rootValue; + const level = getHeatLevel(percentage); + return heatColors[level]; + }); + + return chart; +} + +function renderFlamegraph(chart, data) { + d3.select("#chart").datum(data).call(chart); + window.flamegraphChart = chart; + window.flamegraphData = data; + populateStats(data); +} + +// ============================================================================ +// Search +// ============================================================================ + +function updateSearchHighlight(searchTerm, searchInput) { + d3.selectAll("#chart rect") + .classed("search-match", false) + .classed("search-dim", false); + + // Clear active state from all hotspots + document.querySelectorAll('.hotspot').forEach(h => h.classList.remove('active')); + + if (searchTerm && searchTerm.length > 0) { + let matchCount = 0; + + d3.selectAll("#chart rect").each(function (d) { + if (d && d.data) { + const name = resolveString(d.data.name) || ""; + const funcname = resolveString(d.data.funcname) || ""; + const filename = resolveString(d.data.filename) || ""; + const lineno = d.data.lineno; + const term = searchTerm.toLowerCase(); + + // Check if search term looks like file:line pattern + const fileLineMatch = term.match(/^(.+):(\d+)$/); + let matches = false; + + if (fileLineMatch) { + // Exact file:line matching + const searchFile = fileLineMatch[1]; + const searchLine = parseInt(fileLineMatch[2], 10); + const basename = filename.split('/').pop().toLowerCase(); + matches = basename.includes(searchFile) && lineno === searchLine; + } else { + // Regular substring search + matches = + name.toLowerCase().includes(term) || + funcname.toLowerCase().includes(term) || + filename.toLowerCase().includes(term); + } + + if (matches) { + matchCount++; + d3.select(this).classed("search-match", true); + } else { + d3.select(this).classed("search-dim", true); + } + } + }); + + if (searchInput) { + searchInput.classList.remove("has-matches", "no-matches"); + searchInput.classList.add(matchCount > 0 ? "has-matches" : "no-matches"); + } + + // Mark matching hotspot as active + document.querySelectorAll('.hotspot').forEach(h => { + if (h.dataset.searchterm && h.dataset.searchterm.toLowerCase() === searchTerm.toLowerCase()) { + h.classList.add('active'); + } + }); + } else if (searchInput) { + searchInput.classList.remove("has-matches", "no-matches"); + } +} + +function searchForHotspot(funcname) { + const searchInput = document.getElementById('search-input'); + const searchWrapper = document.querySelector('.search-wrapper'); + if (searchInput) { + // Toggle: if already searching for this term, clear it + if (searchInput.value.trim() === funcname) { + clearSearch(); + } else { + searchInput.value = funcname; + if (searchWrapper) { + searchWrapper.classList.add('has-value'); + } + performSearch(); + } + } +} + +function initSearchHandlers() { + const searchInput = document.getElementById("search-input"); + const searchWrapper = document.querySelector(".search-wrapper"); + if (!searchInput) return; + + let searchTimeout; + function performSearch() { + const term = searchInput.value.trim(); + updateSearchHighlight(term, searchInput); + // Toggle has-value class for clear button visibility + if (searchWrapper) { + searchWrapper.classList.toggle("has-value", term.length > 0); + } + } + + searchInput.addEventListener("input", function () { + clearTimeout(searchTimeout); + searchTimeout = setTimeout(performSearch, 150); + }); + + window.performSearch = performSearch; +} + +function clearSearch() { + const searchInput = document.getElementById("search-input"); + const searchWrapper = document.querySelector(".search-wrapper"); + if (searchInput) { + searchInput.value = ""; + searchInput.classList.remove("has-matches", "no-matches"); + if (searchWrapper) { + searchWrapper.classList.remove("has-value"); + } + // Clear highlights + d3.selectAll("#chart rect") + .classed("search-match", false) + .classed("search-dim", false); + // Clear active hotspot + document.querySelectorAll('.hotspot').forEach(h => h.classList.remove('active')); + } +} + +// ============================================================================ +// Resize Handler +// ============================================================================ + +function handleResize() { + let resizeTimeout; + window.addEventListener("resize", function () { + clearTimeout(resizeTimeout); + resizeTimeout = setTimeout(resizeChart, 100); + }); +} + +function initSidebarResize() { + const sidebar = document.getElementById('sidebar'); + const resizeHandle = document.getElementById('sidebar-resize-handle'); + if (!sidebar || !resizeHandle) return; + + let isResizing = false; + let startX = 0; + let startWidth = 0; + const minWidth = 200; + const maxWidth = 600; + + resizeHandle.addEventListener('mousedown', function(e) { + isResizing = true; + startX = e.clientX; + startWidth = sidebar.offsetWidth; + resizeHandle.classList.add('resizing'); + document.body.classList.add('resizing-sidebar'); + e.preventDefault(); + }); + + document.addEventListener('mousemove', function(e) { + if (!isResizing) return; + + const deltaX = e.clientX - startX; + const newWidth = Math.min(Math.max(startWidth + deltaX, minWidth), maxWidth); + sidebar.style.width = newWidth + 'px'; + e.preventDefault(); + }); + + document.addEventListener('mouseup', function() { + if (isResizing) { + isResizing = false; + resizeHandle.classList.remove('resizing'); + document.body.classList.remove('resizing-sidebar'); + + // Save the new width + const width = sidebar.offsetWidth; + localStorage.setItem('flamegraph-sidebar-width', width); + + // Resize chart after sidebar resize + setTimeout(() => { + resizeChart(); + }, 10); + } + }); +} + +// ============================================================================ +// Thread Stats +// ============================================================================ + +// Mode constants (must match constants.py) +const PROFILING_MODE_WALL = 0; +const PROFILING_MODE_CPU = 1; +const PROFILING_MODE_GIL = 2; +const PROFILING_MODE_ALL = 3; + +function populateThreadStats(data, selectedThreadId = null) { + const stats = data?.stats; + if (!stats || !stats.thread_stats) { + return; + } + + const mode = stats.mode !== undefined ? stats.mode : PROFILING_MODE_WALL; + let threadStats; + + if (selectedThreadId !== null && stats.per_thread_stats && stats.per_thread_stats[selectedThreadId]) { + threadStats = stats.per_thread_stats[selectedThreadId]; + } else { + threadStats = stats.thread_stats; + } + + if (!threadStats || typeof threadStats.total !== 'number' || threadStats.total <= 0) { + return; + } + + const section = document.getElementById('thread-stats-bar'); + if (!section) { + return; + } + + section.style.display = 'block'; + + const gilHeldStat = document.getElementById('gil-held-stat'); + const gilReleasedStat = document.getElementById('gil-released-stat'); + const gilWaitingStat = document.getElementById('gil-waiting-stat'); + + if (mode === PROFILING_MODE_GIL) { + // In GIL mode, hide GIL-related stats + if (gilHeldStat) gilHeldStat.style.display = 'none'; + if (gilReleasedStat) gilReleasedStat.style.display = 'none'; + if (gilWaitingStat) gilWaitingStat.style.display = 'none'; + } else { + // Show all stats + if (gilHeldStat) gilHeldStat.style.display = 'block'; + if (gilReleasedStat) gilReleasedStat.style.display = 'block'; + if (gilWaitingStat) gilWaitingStat.style.display = 'block'; + + const gilHeldPctElem = document.getElementById('gil-held-pct'); + if (gilHeldPctElem) gilHeldPctElem.textContent = `${(threadStats.has_gil_pct || 0).toFixed(1)}%`; + + const gilReleasedPctElem = document.getElementById('gil-released-pct'); + // GIL Released = not holding GIL and not waiting for it + const gilReleasedPct = Math.max(0, 100 - (threadStats.has_gil_pct || 0) - (threadStats.gil_requested_pct || 0)); + if (gilReleasedPctElem) gilReleasedPctElem.textContent = `${gilReleasedPct.toFixed(1)}%`; + + const gilWaitingPctElem = document.getElementById('gil-waiting-pct'); + if (gilWaitingPctElem) gilWaitingPctElem.textContent = `${(threadStats.gil_requested_pct || 0).toFixed(1)}%`; + } + + const gcPctElem = document.getElementById('gc-pct'); + if (gcPctElem) gcPctElem.textContent = `${(threadStats.gc_pct || 0).toFixed(1)}%`; +} + +// ============================================================================ +// Profile Summary Stats +// ============================================================================ + +function formatNumber(num) { + if (num >= 1000000) return (num / 1000000).toFixed(1) + 'M'; + if (num >= 1000) return (num / 1000).toFixed(1) + 'K'; + return num.toLocaleString(); +} + +function formatDuration(seconds) { + if (seconds >= 3600) { + const h = Math.floor(seconds / 3600); + const m = Math.floor((seconds % 3600) / 60); + return `${h}h ${m}m`; + } + if (seconds >= 60) { + const m = Math.floor(seconds / 60); + const s = Math.floor(seconds % 60); + return `${m}m ${s}s`; + } + return seconds.toFixed(2) + 's'; +} + +function populateProfileSummary(data) { + const stats = data.stats || {}; + const totalSamples = stats.total_samples || data.value || 0; + const duration = stats.duration_sec || 0; + const sampleRate = stats.sample_rate || (duration > 0 ? totalSamples / duration : 0); + const errorRate = stats.error_rate || 0; + const missedSamples= stats.missed_samples || 0; + + const samplesEl = document.getElementById('stat-total-samples'); + if (samplesEl) samplesEl.textContent = formatNumber(totalSamples); + + const durationEl = document.getElementById('stat-duration'); + if (durationEl) durationEl.textContent = duration > 0 ? formatDuration(duration) : '--'; + + const rateEl = document.getElementById('stat-sample-rate'); + if (rateEl) rateEl.textContent = sampleRate > 0 ? formatNumber(Math.round(sampleRate)) : '--'; + + // Count unique functions + let functionCount = 0; + function countFunctions(node) { + if (!node) return; + functionCount++; + if (node.children) node.children.forEach(countFunctions); + } + countFunctions(data); + + const functionsEl = document.getElementById('stat-functions'); + if (functionsEl) functionsEl.textContent = formatNumber(functionCount); + + // Efficiency bar + if (errorRate !== undefined && errorRate !== null) { + const efficiency = Math.max(0, Math.min(100, (100 - errorRate))); + + const efficiencySection = document.getElementById('efficiency-section'); + if (efficiencySection) efficiencySection.style.display = 'block'; + + const efficiencyValue = document.getElementById('stat-efficiency'); + if (efficiencyValue) efficiencyValue.textContent = efficiency.toFixed(1) + '%'; + + const efficiencyFill = document.getElementById('efficiency-fill'); + if (efficiencyFill) efficiencyFill.style.width = efficiency + '%'; + } + // MissedSamples bar + if (missedSamples !== undefined && missedSamples !== null) { + const sampleEfficiency = Math.max(0, missedSamples); + + const efficiencySection = document.getElementById('efficiency-section'); + if (efficiencySection) efficiencySection.style.display = 'block'; + + const sampleEfficiencyValue = document.getElementById('stat-missed-samples'); + if (sampleEfficiencyValue) sampleEfficiencyValue.textContent = sampleEfficiency.toFixed(1) + '%'; + + const sampleEfficiencyFill = document.getElementById('missed-samples-fill'); + if (sampleEfficiencyFill) sampleEfficiencyFill.style.width = sampleEfficiency + '%'; + } +} + +// ============================================================================ +// Hotspot Stats +// ============================================================================ + +function populateStats(data) { + const totalSamples = data.value || 0; + + // Populate profile summary + populateProfileSummary(data); + + // Populate thread statistics if available + populateThreadStats(data); + + const functionMap = new Map(); + + function collectFunctions(node) { + if (!node) return; + + let filename = resolveString(node.filename); + let funcname = resolveString(node.funcname); + + if (!filename || !funcname) { + const nameStr = resolveString(node.name); + if (nameStr?.includes('(')) { + const match = nameStr.match(/^(.+?)\s*\((.+?):(\d+)\)$/); + if (match) { + funcname = funcname || match[1]; + filename = filename || match[2]; + } + } + } + + filename = filename || 'unknown'; + funcname = funcname || 'unknown'; + + if (filename !== 'unknown' && funcname !== 'unknown' && node.value > 0) { + let childrenValue = 0; + if (node.children) { + childrenValue = node.children.reduce((sum, child) => sum + child.value, 0); + } + const directSamples = Math.max(0, node.value - childrenValue); + + const funcKey = `${filename}:${node.lineno || '?'}:${funcname}`; + + if (functionMap.has(funcKey)) { + const existing = functionMap.get(funcKey); + existing.directSamples += directSamples; + existing.directPercent = (existing.directSamples / totalSamples) * 100; + if (directSamples > existing.maxSingleSamples) { + existing.filename = filename; + existing.lineno = node.lineno || '?'; + existing.maxSingleSamples = directSamples; + } + } else { + functionMap.set(funcKey, { + filename: filename, + lineno: node.lineno || '?', + funcname: funcname, + directSamples, + directPercent: (directSamples / totalSamples) * 100, + maxSingleSamples: directSamples + }); + } + } + + if (node.children) { + node.children.forEach(child => collectFunctions(child)); + } + } + + collectFunctions(data); + + const hotSpots = Array.from(functionMap.values()) + .filter(f => f.directPercent > 0.5) + .sort((a, b) => b.directPercent - a.directPercent) + .slice(0, 3); + + // Populate and animate hotspot cards + for (let i = 0; i < 3; i++) { + const num = i + 1; + const card = document.getElementById(`hotspot-${num}`); + const funcEl = document.getElementById(`hotspot-func-${num}`); + const fileEl = document.getElementById(`hotspot-file-${num}`); + const percentEl = document.getElementById(`hotspot-percent-${num}`); + const samplesEl = document.getElementById(`hotspot-samples-${num}`); + + if (i < hotSpots.length && hotSpots[i]) { + const h = hotSpots[i]; + const filename = h.filename || 'unknown'; + const lineno = h.lineno ?? '?'; + const isSpecialFrame = filename === '~' && (lineno === 0 || lineno === '?'); + + let funcDisplay = h.funcname || 'unknown'; + if (funcDisplay.length > 28) funcDisplay = funcDisplay.substring(0, 25) + '...'; + + if (funcEl) funcEl.textContent = funcDisplay; + if (fileEl) { + if (isSpecialFrame) { + fileEl.textContent = '--'; + } else { + const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown'; + fileEl.textContent = `${basename}:${lineno}`; + } + } + if (percentEl) percentEl.textContent = `${h.directPercent.toFixed(1)}%`; + if (samplesEl) samplesEl.textContent = ` (${h.directSamples.toLocaleString()})`; + } else { + if (funcEl) funcEl.textContent = '--'; + if (fileEl) fileEl.textContent = '--'; + if (percentEl) percentEl.textContent = '--'; + if (samplesEl) samplesEl.textContent = ''; + } + + // Add click handler and animate entrance + if (card) { + if (i < hotSpots.length && hotSpots[i]) { + const h = hotSpots[i]; + const basename = h.filename !== 'unknown' ? h.filename.split('/').pop() : ''; + const searchTerm = basename && h.lineno !== '?' ? `${basename}:${h.lineno}` : h.funcname; + card.dataset.searchterm = searchTerm; + card.onclick = () => searchForHotspot(searchTerm); + card.style.cursor = 'pointer'; + } else { + card.onclick = null; + delete card.dataset.searchterm; + card.style.cursor = 'default'; + } + + setTimeout(() => { + card.classList.add('visible'); + }, 100 + i * 80); + } + } +} + +// ============================================================================ +// Thread Filter +// ============================================================================ + +function initThreadFilter(data) { + const threadFilter = document.getElementById('thread-filter'); + const threadSection = document.getElementById('thread-section'); + + if (!threadFilter || !data.threads) return; + + threadFilter.innerHTML = ''; + + const threads = data.threads || []; + threads.forEach(threadId => { + const option = document.createElement('option'); + option.value = threadId; + option.textContent = `Thread ${threadId}`; + threadFilter.appendChild(option); + }); + + if (threads.length > 1 && threadSection) { + threadSection.style.display = 'block'; + } +} + +function filterByThread() { + const threadFilter = document.getElementById('thread-filter'); + if (!threadFilter || !originalData) return; + + const selectedThread = threadFilter.value; + currentThreadFilter = selectedThread; + + let filteredData; + let selectedThreadId = null; + + if (selectedThread === 'all') { + filteredData = originalData; + } else { + selectedThreadId = parseInt(selectedThread, 10); + filteredData = filterDataByThread(originalData, selectedThreadId); + + if (filteredData.strings) { + stringTable = filteredData.strings; + filteredData = resolveStringIndices(filteredData); + } + } + + const tooltip = createPythonTooltip(filteredData); + const chart = createFlamegraph(tooltip, filteredData.value); + renderFlamegraph(chart, filteredData); + + populateThreadStats(originalData, selectedThreadId); +} + +function filterDataByThread(data, threadId) { + function filterNode(node) { + if (!node.threads || !node.threads.includes(threadId)) { + return null; + } + + const filteredNode = { ...node, children: [] }; + + if (node.children && Array.isArray(node.children)) { + filteredNode.children = node.children + .map(child => filterNode(child)) + .filter(child => child !== null); + } + + return filteredNode; + } + + function recalculateValue(node) { + if (!node.children || node.children.length === 0) { + return node.value || 0; + } + const childrenValue = node.children.reduce((sum, child) => sum + recalculateValue(child), 0); + node.value = Math.max(node.value || 0, childrenValue); + return node.value; + } + + const filteredRoot = { ...data, children: [] }; + + if (data.children && Array.isArray(data.children)) { + filteredRoot.children = data.children + .map(child => filterNode(child)) + .filter(child => child !== null); + } + + recalculateValue(filteredRoot); + return filteredRoot; +} + +// ============================================================================ +// Control Functions +// ============================================================================ + +function resetZoom() { + if (window.flamegraphChart) { + window.flamegraphChart.resetZoom(); + } +} + +function exportSVG() { + const svgElement = document.querySelector("#chart svg"); + if (!svgElement) { + console.warn("Cannot export: No flamegraph SVG found"); + return; + } + const serializer = new XMLSerializer(); + const svgString = serializer.serializeToString(svgElement); + const blob = new Blob([svgString], { type: "image/svg+xml" }); + const url = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = url; + a.download = "python-performance-flamegraph.svg"; + a.click(); + URL.revokeObjectURL(url); +} + +// ============================================================================ +// Initialization +// ============================================================================ + +function initFlamegraph() { + ensureLibraryLoaded(); + restoreUIState(); + + let processedData = EMBEDDED_DATA; + if (EMBEDDED_DATA.strings) { + stringTable = EMBEDDED_DATA.strings; + processedData = resolveStringIndices(EMBEDDED_DATA); + } + + originalData = processedData; + initThreadFilter(processedData); + + const tooltip = createPythonTooltip(processedData); + const chart = createFlamegraph(tooltip, processedData.value); + renderFlamegraph(chart, processedData); + initSearchHandlers(); + initSidebarResize(); + handleResize(); +} + +if (document.readyState === "loading") { + document.addEventListener("DOMContentLoaded", initFlamegraph); +} else { + initFlamegraph(); +} diff --git a/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html new file mode 100644 index 00000000000..82102c229e7 --- /dev/null +++ b/Lib/profiling/sampling/_flamegraph_assets/flamegraph_template.html @@ -0,0 +1,309 @@ + + + + + + Tachyon Profiler - Flamegraph + + + + + + + +
+ +
+
+ Tachyon + + Profiler +
+
+ + +
+
+ + + +
+
+ + +
+ + + + +
+
+
+
+ + +
+ + + + +
+
+ + + + diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.css b/Lib/profiling/sampling/_heatmap_assets/heatmap.css new file mode 100644 index 00000000000..44915b2a2da --- /dev/null +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.css @@ -0,0 +1,1146 @@ +/* ========================================================================== + Heatmap Viewer - Component-Specific CSS + + DEPENDENCY: Requires _shared_assets/base.css to be loaded first + This file extends the shared foundation with heatmap-specific styles. + ========================================================================== */ + +/* -------------------------------------------------------------------------- + Layout Overrides (Heatmap-specific) + -------------------------------------------------------------------------- */ + +.app-layout { + min-height: 100vh; +} + +/* Sticky top bar for heatmap views */ +.top-bar { + position: sticky; + top: 0; + z-index: 100; +} + +/* Back link in toolbar */ +.back-link { + color: white; + text-decoration: none; + padding: 6px 14px; + background: rgba(255, 255, 255, 0.12); + border: 1px solid rgba(255, 255, 255, 0.18); + border-radius: 6px; + font-size: 13px; + font-weight: 500; + transition: all var(--transition-fast); +} + +.back-link:hover { + background: rgba(255, 255, 255, 0.22); + border-color: rgba(255, 255, 255, 0.35); +} + +/* -------------------------------------------------------------------------- + Main Content Area + -------------------------------------------------------------------------- */ + +.main-content { + flex: 1; + padding: 24px 3%; + width: 100%; + max-width: 100%; +} + +/* -------------------------------------------------------------------------- + Stats Summary Cards - Enhanced with Icons & Animations + -------------------------------------------------------------------------- */ + +.stats-summary { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 12px; + margin-bottom: 24px; +} + +.stat-card { + display: flex; + align-items: center; + gap: 12px; + background: var(--bg-primary); + border: 2px solid var(--border); + border-radius: 10px; + padding: 14px 16px; + transition: all var(--transition-fast); + animation: slideUp 0.5s ease-out backwards; + animation-delay: calc(var(--i, 0) * 0.08s); + position: relative; + overflow: hidden; +} + +.stat-card::before { + content: ''; + position: absolute; + top: 0; + left: 0; + right: 0; + height: 3px; + background: linear-gradient(90deg, var(--python-blue), var(--python-gold)); + opacity: 0; + transition: opacity var(--transition-fast); +} + +.stat-card:nth-child(1) { --i: 0; --card-color: 55, 118, 171; } +.stat-card:nth-child(2) { --i: 1; --card-color: 40, 167, 69; } +.stat-card:nth-child(3) { --i: 2; --card-color: 255, 193, 7; } +.stat-card:nth-child(4) { --i: 3; --card-color: 111, 66, 193; } +.stat-card:nth-child(5) { --i: 4; --card-color: 220, 53, 69; } +.stat-card:nth-child(6) { --i: 5; --card-color: 23, 162, 184; } + +.stat-card:hover { + border-color: rgba(var(--card-color), 0.6); + background: linear-gradient(135deg, rgba(var(--card-color), 0.08) 0%, var(--bg-primary) 100%); + transform: translateY(-2px); + box-shadow: 0 4px 16px rgba(var(--card-color), 0.15); +} + +.stat-card:hover::before { + opacity: 1; +} + +.stat-icon { + width: 40px; + height: 40px; + display: flex; + align-items: center; + justify-content: center; + font-size: 18px; + background: linear-gradient(135deg, rgba(var(--card-color), 0.15) 0%, rgba(var(--card-color), 0.05) 100%); + border: 1px solid rgba(var(--card-color), 0.2); + border-radius: 10px; + flex-shrink: 0; + transition: all var(--transition-fast); +} + +.stat-card:hover .stat-icon { + transform: scale(1.05) rotate(-2deg); + background: linear-gradient(135deg, rgba(var(--card-color), 0.25) 0%, rgba(var(--card-color), 0.1) 100%); +} + +.stat-data { + flex: 1; + min-width: 0; +} + +.stat-value { + font-family: var(--font-mono); + font-size: 1.35em; + font-weight: 800; + color: rgb(var(--card-color)); + display: block; + line-height: 1.1; + letter-spacing: -0.3px; +} + +.stat-label { + font-size: 10px; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.3px; + margin-top: 2px; +} + +/* Sparkline decoration for stats */ +.stat-sparkline { + position: absolute; + bottom: 0; + left: 0; + right: 0; + height: 30px; + opacity: 0.1; + background: linear-gradient(180deg, + transparent 0%, + rgba(var(--card-color), 0.3) 100% + ); + pointer-events: none; +} + +/* -------------------------------------------------------------------------- + Rate Cards (Error Rate, Missed Samples) with Progress Bars + -------------------------------------------------------------------------- */ + +.rate-card { + display: flex; + flex-direction: column; + gap: 12px; + background: var(--bg-primary); + border: 2px solid var(--border); + border-radius: 12px; + padding: 18px 20px; + transition: all var(--transition-fast); + animation: slideUp 0.5s ease-out backwards; + position: relative; + overflow: hidden; +} + +.rate-card:nth-child(5) { animation-delay: 0.32s; --rate-color: 220, 53, 69; } +.rate-card:nth-child(6) { animation-delay: 0.40s; --rate-color: 255, 152, 0; } + +.rate-card:hover { + border-color: rgba(var(--rate-color), 0.5); + transform: translateY(-2px); + box-shadow: 0 6px 20px rgba(var(--rate-color), 0.15); +} + +.rate-header { + display: flex; + justify-content: space-between; + align-items: center; +} + +.rate-info { + display: flex; + align-items: center; + gap: 10px; +} + +.rate-icon { + width: 36px; + height: 36px; + display: flex; + align-items: center; + justify-content: center; + font-size: 18px; + background: linear-gradient(135deg, rgba(var(--rate-color), 0.15) 0%, rgba(var(--rate-color), 0.05) 100%); + border: 1px solid rgba(var(--rate-color), 0.2); + border-radius: 10px; + flex-shrink: 0; +} + +.rate-label { + font-size: 12px; + font-weight: 600; + color: var(--text-secondary); + text-transform: uppercase; + letter-spacing: 0.3px; +} + +.rate-value { + font-family: var(--font-mono); + font-size: 1.4em; + font-weight: 800; + color: rgb(var(--rate-color)); +} + +.rate-bar { + height: 8px; + background: var(--bg-tertiary); + border-radius: 4px; + overflow: hidden; + position: relative; +} + +.rate-fill { + height: 100%; + border-radius: 4px; + transition: width 0.8s ease-out; + position: relative; + overflow: hidden; +} + +.rate-fill.error { + background: linear-gradient(90deg, #dc3545 0%, #ff6b6b 100%); +} + +.rate-fill.warning { + background: linear-gradient(90deg, #ff9800 0%, #ffc107 100%); +} + +.rate-fill.good { + background: linear-gradient(90deg, #28a745 0%, #20c997 100%); +} + +/* Shimmer animation on rate bars */ +.rate-fill::after { + content: ''; + position: absolute; + top: 0; + left: -100%; + width: 100%; + height: 100%; + background: linear-gradient( + 90deg, + transparent 0%, + rgba(255, 255, 255, 0.4) 50%, + transparent 100% + ); + animation: shimmer 2.5s ease-in-out infinite; +} + +/* -------------------------------------------------------------------------- + Section Headers + -------------------------------------------------------------------------- */ + +.section-header { + display: flex; + align-items: center; + gap: 12px; + margin-bottom: 16px; + padding-bottom: 12px; + border-bottom: 2px solid var(--python-gold); +} + +.section-title { + font-size: 18px; + font-weight: 700; + color: var(--text-primary); + margin: 0; + flex: 1; +} + +/* -------------------------------------------------------------------------- + Filter Controls + -------------------------------------------------------------------------- */ + +.filter-controls { + display: flex; + gap: 8px; + flex-wrap: wrap; + align-items: center; + margin-bottom: 16px; +} + +.control-btn { + padding: 8px 16px; + background: var(--bg-secondary); + color: var(--text-primary); + border: 1px solid var(--border); + border-radius: 6px; + font-size: 13px; + font-weight: 500; + cursor: pointer; + transition: all var(--transition-fast); +} + +.control-btn:hover { + background: var(--accent); + color: white; + border-color: var(--accent); +} + +/* -------------------------------------------------------------------------- + Type Sections (stdlib, project, etc) + -------------------------------------------------------------------------- */ + +.type-section { + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px; + overflow: hidden; + margin-bottom: 12px; +} + +.type-header { + padding: 12px 16px; + background: var(--header-gradient); + color: white; + cursor: pointer; + display: flex; + align-items: center; + gap: 10px; + user-select: none; + transition: all var(--transition-fast); + font-weight: 600; +} + +.type-header:hover { + opacity: 0.95; +} + +.type-icon { + font-size: 12px; + transition: transform var(--transition-fast); + min-width: 12px; +} + +.type-title { + font-size: 14px; + flex: 1; +} + +.type-stats { + font-size: 12px; + opacity: 0.9; + background: rgba(255, 255, 255, 0.15); + padding: 4px 10px; + border-radius: 4px; + font-family: var(--font-mono); +} + +.type-content { + padding: 12px; +} + +/* -------------------------------------------------------------------------- + Folder Nodes (hierarchical structure) + -------------------------------------------------------------------------- */ + +.folder-node { + margin-bottom: 6px; +} + +.folder-header { + padding: 8px 12px; + background: var(--bg-secondary); + border: 1px solid var(--border); + border-radius: 6px; + cursor: pointer; + display: flex; + align-items: center; + gap: 8px; + user-select: none; + transition: all var(--transition-fast); +} + +.folder-header:hover { + background: var(--accent-glow); + border-color: var(--accent); +} + +.folder-icon { + font-size: 10px; + color: var(--accent); + transition: transform var(--transition-fast); + min-width: 12px; +} + +.folder-name { + flex: 1; + font-weight: 500; + color: var(--text-primary); + font-size: 13px; +} + +.folder-stats { + font-size: 11px; + color: var(--text-secondary); + background: var(--bg-tertiary); + padding: 2px 8px; + border-radius: 4px; + font-family: var(--font-mono); +} + +.folder-content { + padding-left: 20px; + margin-top: 6px; +} + +/* -------------------------------------------------------------------------- + File Items + -------------------------------------------------------------------------- */ + +.files-list { + display: flex; + flex-direction: column; + gap: 4px; + margin-top: 8px; +} + +.file-item { + display: flex; + align-items: center; + gap: 12px; + padding: 8px 12px; + background: var(--bg-primary); + border: 1px solid var(--border-subtle); + border-radius: 6px; + transition: all var(--transition-fast); +} + +.file-item:hover { + background: var(--bg-secondary); + border-color: var(--border); +} + +.file-item .file-link { + flex: 1; + min-width: 0; + font-size: 13px; +} + +.file-samples { + font-size: 12px; + color: var(--text-secondary); + font-weight: 600; + white-space: nowrap; + width: 130px; + flex-shrink: 0; + text-align: right; + font-family: var(--font-mono); +} + +.heatmap-bar-container { + width: 120px; + flex-shrink: 0; + display: flex; + align-items: center; +} + +.heatmap-bar { + flex-shrink: 0; + border-radius: 2px; +} + +/* Links */ +.file-link { + color: var(--accent); + text-decoration: none; + font-weight: 500; + transition: color var(--transition-fast); +} + +.file-link:hover { + color: var(--accent-hover); + text-decoration: underline; +} + +/* -------------------------------------------------------------------------- + Module Badges + -------------------------------------------------------------------------- */ + +.module-badge { + display: inline-block; + padding: 3px 8px; + border-radius: 4px; + font-size: 11px; + font-weight: 600; +} + +.badge-stdlib { + background: rgba(40, 167, 69, 0.15); + color: #28a745; +} + +.badge-site-packages { + background: rgba(0, 123, 255, 0.15); + color: #007bff; +} + +.badge-project { + background: rgba(255, 193, 7, 0.2); + color: #d39e00; +} + +.badge-other { + background: var(--bg-tertiary); + color: var(--text-secondary); +} + +[data-theme="dark"] .badge-stdlib { + background: rgba(40, 167, 69, 0.25); + color: #5dd879; +} + +[data-theme="dark"] .badge-site-packages { + background: rgba(88, 166, 255, 0.25); + color: #79b8ff; +} + +[data-theme="dark"] .badge-project { + background: rgba(255, 212, 59, 0.25); + color: #ffd43b; +} + +/* ========================================================================== + FILE VIEW STYLES (Code Display) + ========================================================================== */ + +.code-view { + font-family: var(--font-mono); + min-height: 100vh; +} + +/* Code Header (Top Bar for file view) */ +.code-header { + height: var(--topbar-height); + background: var(--header-gradient); + display: flex; + align-items: center; + padding: 0 16px; + gap: 16px; + box-shadow: 0 2px 10px rgba(55, 118, 171, 0.25); + border-bottom: 2px solid var(--python-gold); + position: sticky; + top: 0; + z-index: 100; +} + +.code-header-content { + display: flex; + align-items: center; + justify-content: space-between; + width: 94%; + max-width: 100%; + margin: 0 auto; +} + +.code-header h1 { + font-size: 14px; + font-weight: 600; + color: white; + margin: 0; + font-family: var(--font-mono); + display: flex; + align-items: center; + gap: 8px; +} + +/* File Stats Bar */ +.file-stats { + background: var(--bg-secondary); + padding: 16px 24px; + border-bottom: 1px solid var(--border); +} + +.file-stats .stats-grid { + width: 94%; + max-width: 100%; + margin: 0 auto; + display: grid; + grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); + gap: 12px; +} + +.stat-item { + background: var(--bg-primary); + padding: 12px; + border-radius: 8px; + box-shadow: var(--shadow-sm); + text-align: center; + border: 1px solid var(--border); + transition: all var(--transition-fast); +} + +.stat-item:hover { + transform: translateY(-2px); + box-shadow: var(--shadow-md); + border-color: var(--accent); +} + +.stat-item .stat-value { + font-size: 1.4em; + font-weight: 700; + color: var(--accent); +} + +.stat-item .stat-label { + color: var(--text-muted); + font-size: 10px; + margin-top: 2px; +} + +/* Legend */ +.legend { + background: var(--bg-secondary); + padding: 12px 24px; + border-bottom: 1px solid var(--border); +} + +.legend-content { + width: 94%; + max-width: 100%; + margin: 0 auto; + display: flex; + align-items: center; + gap: 20px; + flex-wrap: wrap; +} + +.legend-title { + font-weight: 600; + color: var(--text-primary); + font-size: 13px; + font-family: var(--font-sans); +} + +.legend-gradient { + flex: 1; + max-width: 300px; + height: 24px; + background: linear-gradient(90deg, + var(--bg-tertiary) 0%, + var(--heat-2) 25%, + var(--heat-4) 50%, + var(--heat-6) 75%, + var(--heat-8) 100% + ); + border-radius: 4px; + border: 1px solid var(--border); +} + +.legend-labels { + display: flex; + gap: 12px; + font-size: 11px; + color: var(--text-muted); + font-family: var(--font-sans); +} + +/* Toggle Switch Styles */ +.toggle-switch { + display: inline-flex; + align-items: center; + gap: 8px; + cursor: pointer; + user-select: none; + font-family: var(--font-sans); + transition: opacity var(--transition-fast); +} + +.toggle-switch:hover { + opacity: 0.85; +} + +.toggle-switch .toggle-label { + font-size: 11px; + font-weight: 500; + color: var(--text-muted); + min-width: 55px; + text-align: right; + transition: color var(--transition-fast); +} + +.toggle-switch .toggle-label:last-child { + text-align: left; +} + +.toggle-switch .toggle-label.active { + color: var(--text-primary); + font-weight: 600; +} + +.toggle-track { + position: relative; + width: 36px; + height: 20px; + background: var(--bg-tertiary); + border: 2px solid var(--border); + border-radius: 12px; + transition: all var(--transition-fast); + box-shadow: inset var(--shadow-sm); +} + +.toggle-track:hover { + border-color: var(--text-muted); +} + +.toggle-track.on { + background: var(--accent); + border-color: var(--accent); + box-shadow: 0 0 8px var(--accent-glow); +} + +.toggle-track::after { + content: ''; + position: absolute; + top: 1px; + left: 1px; + width: 14px; + height: 14px; + background: white; + border-radius: 50%; + box-shadow: var(--shadow-sm); + transition: all var(--transition-fast); +} + +.toggle-track.on::after { + transform: translateX(16px); + box-shadow: var(--shadow-md); +} + +/* Specific toggle overrides */ +#toggle-color-mode .toggle-track.on { + background: #8e44ad; + border-color: #8e44ad; + box-shadow: 0 0 8px rgba(142, 68, 173, 0.3); +} + +#toggle-cold .toggle-track.on { + background: #e67e22; + border-color: #e67e22; + box-shadow: 0 0 8px rgba(230, 126, 34, 0.3); +} + +/* Code Container */ +.code-container { + width: 94%; + max-width: 100%; + margin: 16px auto; + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px 8px 8px 8px; + box-shadow: var(--shadow-sm); + /* Allow horizontal scroll for long lines, but don't clip sticky header */ +} + +/* Code Header Row */ +.code-header-row { + position: sticky; + top: var(--topbar-height); + z-index: 50; + display: flex; + background: var(--bg-secondary); + border-bottom: 2px solid var(--border); + font-weight: 700; + font-size: 11px; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.5px; + border-radius: 8px 8px 0 0; +} + +.header-line-number { + flex-shrink: 0; + width: 60px; + padding: 8px 10px; + text-align: right; + border-right: 1px solid var(--border); +} + +.header-samples-self, +.header-samples-cumulative { + flex-shrink: 0; + width: 90px; + padding: 8px 10px; + text-align: right; + border-right: 1px solid var(--border); +} + +.header-samples-self { + color: var(--heat-8); +} + +.header-samples-cumulative { + color: var(--accent); +} + +.header-content { + flex: 1; + padding: 8px 15px; +} + +/* Code Lines */ +.code-line { + position: relative; + display: flex; + min-height: 20px; + line-height: 20px; + font-size: 13px; + transition: background var(--transition-fast); + scroll-margin-top: calc(var(--topbar-height) + 50px); +} + +.code-line:hover { + filter: brightness(0.97); +} + +[data-theme="dark"] .code-line:hover { + filter: brightness(1.1); +} + +.line-number { + flex-shrink: 0; + width: 60px; + padding: 0 10px; + text-align: right; + color: var(--text-muted); + background: var(--bg-secondary); + border-right: 1px solid var(--border); + user-select: none; + transition: all var(--transition-fast); +} + +.line-number:hover { + background: var(--accent); + color: white; + cursor: pointer; +} + +.line-samples-self, +.line-samples-cumulative { + flex-shrink: 0; + width: 90px; + padding: 0 10px; + text-align: right; + background: var(--bg-secondary); + border-right: 1px solid var(--border); + font-weight: 600; + user-select: none; + font-size: 12px; +} + +.line-samples-self { + color: var(--heat-8); +} + +.line-samples-cumulative { + color: var(--accent); +} + +.line-content { + flex: 1; + padding: 0 15px; + white-space: pre; + overflow-x: auto; +} + +/* Scrollbar Styling */ +.line-content::-webkit-scrollbar { + height: 6px; +} + +.line-content::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 3px; +} + +.line-content::-webkit-scrollbar-thumb:hover { + background: var(--text-muted); +} + +/* Navigation Buttons */ +.line-nav-buttons { + position: absolute; + right: 8px; + top: 50%; + transform: translateY(-50%); + display: flex; + gap: 4px; + align-items: center; +} + +.nav-btn { + padding: 2px 6px; + font-size: 12px; + font-weight: 500; + border: 1px solid var(--accent); + border-radius: 4px; + background: var(--bg-primary); + color: var(--accent); + cursor: pointer; + transition: all var(--transition-fast); + user-select: none; + line-height: 1; +} + +.nav-btn:hover:not(:disabled) { + background: var(--accent); + color: white; + transform: translateY(-1px); + box-shadow: var(--shadow-sm); +} + +.nav-btn:active:not(:disabled) { + transform: translateY(0); +} + +.nav-btn:disabled { + opacity: 0.3; + cursor: not-allowed; + color: var(--text-muted); + background: var(--bg-secondary); + border-color: var(--border); +} + +.nav-btn.caller { + color: var(--nav-caller); + border-color: var(--nav-caller); +} + +.nav-btn.callee { + color: var(--nav-callee); + border-color: var(--nav-callee); +} + +.nav-btn.caller:hover:not(:disabled) { + background: var(--nav-caller-hover); + color: white; +} + +.nav-btn.callee:hover:not(:disabled) { + background: var(--nav-callee-hover); + color: white; +} + +/* Highlighted target line */ +.code-line:target { + animation: highlight-line 2s ease-out; +} + +@keyframes highlight-line { + 0% { + background: rgba(255, 212, 59, 0.6) !important; + outline: 3px solid var(--python-gold); + outline-offset: -3px; + } + 50% { + background: rgba(255, 212, 59, 0.5) !important; + outline: 3px solid var(--python-gold); + outline-offset: -3px; + } + 100% { + background: inherit; + outline: 3px solid transparent; + outline-offset: -3px; + } +} + +/* Popup menu for multiple callees */ +.callee-menu { + position: absolute; + background: var(--bg-primary); + border: 1px solid var(--border); + border-radius: 8px; + box-shadow: var(--shadow-lg); + padding: 8px; + z-index: 1000; + min-width: 250px; + max-width: 400px; + max-height: 300px; + overflow-y: auto; +} + +.callee-menu-header { + font-weight: 600; + color: var(--text-primary); + margin-bottom: 8px; + padding-bottom: 8px; + border-bottom: 1px solid var(--border); + font-size: 13px; + font-family: var(--font-sans); +} + +.callee-menu-item { + padding: 8px; + margin: 4px 0; + border-radius: 6px; + cursor: pointer; + transition: background var(--transition-fast); + display: flex; + flex-direction: column; + gap: 4px; +} + +.callee-menu-item:hover { + background: var(--bg-secondary); +} + +.callee-menu-func { + font-weight: 500; + color: var(--accent); + font-size: 12px; +} + +.callee-menu-file { + font-size: 11px; + color: var(--text-muted); +} + +.count-badge { + display: inline-block; + background: var(--accent); + color: white; + font-size: 10px; + padding: 2px 6px; + border-radius: 4px; + font-weight: 600; + margin-left: 6px; +} + +/* Callee menu scrollbar */ +.callee-menu::-webkit-scrollbar { + width: 6px; +} + +.callee-menu::-webkit-scrollbar-track { + background: var(--bg-secondary); + border-radius: 3px; +} + +.callee-menu::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 3px; +} + +/* -------------------------------------------------------------------------- + Scroll Minimap Marker + -------------------------------------------------------------------------- */ + +#scroll_marker { + position: fixed; + z-index: 1000; + right: 0; + top: 0; + width: 12px; + height: 100%; + background: var(--bg-secondary); + border-left: 1px solid var(--border); + pointer-events: none; +} + +#scroll_marker .marker { + position: absolute; + min-height: 3px; + width: 100%; + pointer-events: none; +} + +#scroll_marker .marker.cold { + background: var(--heat-2); +} + +#scroll_marker .marker.warm { + background: var(--heat-5); +} + +#scroll_marker .marker.hot { + background: var(--heat-7); +} + +#scroll_marker .marker.vhot { + background: var(--heat-8); +} + +/* -------------------------------------------------------------------------- + Responsive (Heatmap-specific) + -------------------------------------------------------------------------- */ + +@media (max-width: 1100px) { + .stats-summary { + grid-template-columns: repeat(2, 1fr); + } +} + +@media (max-width: 900px) { + .main-content { + padding: 16px; + } +} + +@media (max-width: 600px) { + .stats-summary { + grid-template-columns: 1fr; + } + + .file-stats .stats-grid { + grid-template-columns: repeat(2, 1fr); + } + + .legend-content { + flex-direction: column; + gap: 12px; + } + + .legend-gradient { + width: 100%; + max-width: none; + } +} diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap.js b/Lib/profiling/sampling/_heatmap_assets/heatmap.js new file mode 100644 index 00000000000..ccf82386363 --- /dev/null +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap.js @@ -0,0 +1,349 @@ +// Tachyon Profiler - Heatmap JavaScript +// Interactive features for the heatmap visualization +// Aligned with Flamegraph viewer design patterns + +// ============================================================================ +// State Management +// ============================================================================ + +let currentMenu = null; +let colorMode = 'self'; // 'self' or 'cumulative' - default to self +let coldCodeHidden = false; + +// ============================================================================ +// Theme Support +// ============================================================================ + +function toggleTheme() { + const html = document.documentElement; + const current = html.getAttribute('data-theme') || 'light'; + const next = current === 'light' ? 'dark' : 'light'; + html.setAttribute('data-theme', next); + localStorage.setItem('heatmap-theme', next); + + // Update theme button icon + const btn = document.getElementById('theme-btn'); + if (btn) { + btn.innerHTML = next === 'dark' ? '☼' : '☾'; // sun or moon + } + + // Rebuild scroll marker with new theme colors + buildScrollMarker(); +} + +function restoreUIState() { + // Restore theme + const savedTheme = localStorage.getItem('heatmap-theme'); + if (savedTheme) { + document.documentElement.setAttribute('data-theme', savedTheme); + const btn = document.getElementById('theme-btn'); + if (btn) { + btn.innerHTML = savedTheme === 'dark' ? '☼' : '☾'; + } + } +} + +// ============================================================================ +// Utility Functions +// ============================================================================ + +function createElement(tag, className, textContent = '') { + const el = document.createElement(tag); + if (className) el.className = className; + if (textContent) el.textContent = textContent; + return el; +} + +function calculateMenuPosition(buttonRect, menuWidth, menuHeight) { + const viewport = { width: window.innerWidth, height: window.innerHeight }; + const scroll = { + x: window.pageXOffset || document.documentElement.scrollLeft, + y: window.pageYOffset || document.documentElement.scrollTop + }; + + const left = buttonRect.right + menuWidth + 10 < viewport.width + ? buttonRect.right + scroll.x + 10 + : Math.max(scroll.x + 10, buttonRect.left + scroll.x - menuWidth - 10); + + const top = buttonRect.bottom + menuHeight + 10 < viewport.height + ? buttonRect.bottom + scroll.y + 5 + : Math.max(scroll.y + 10, buttonRect.top + scroll.y - menuHeight - 10); + + return { left, top }; +} + +// ============================================================================ +// Menu Management +// ============================================================================ + +function closeMenu() { + if (currentMenu) { + currentMenu.remove(); + currentMenu = null; + } +} + +function showNavigationMenu(button, items, title) { + closeMenu(); + + const menu = createElement('div', 'callee-menu'); + menu.appendChild(createElement('div', 'callee-menu-header', title)); + + items.forEach(linkData => { + const item = createElement('div', 'callee-menu-item'); + + const funcDiv = createElement('div', 'callee-menu-func'); + funcDiv.textContent = linkData.func; + + if (linkData.count !== undefined && linkData.count > 0) { + const countBadge = createElement('span', 'count-badge'); + countBadge.textContent = linkData.count.toLocaleString(); + countBadge.title = `${linkData.count.toLocaleString()} samples`; + funcDiv.appendChild(document.createTextNode(' ')); + funcDiv.appendChild(countBadge); + } + + item.appendChild(funcDiv); + item.appendChild(createElement('div', 'callee-menu-file', linkData.file)); + item.addEventListener('click', () => window.location.href = linkData.link); + menu.appendChild(item); + }); + + const pos = calculateMenuPosition(button.getBoundingClientRect(), 350, 300); + menu.style.left = `${pos.left}px`; + menu.style.top = `${pos.top}px`; + + document.body.appendChild(menu); + currentMenu = menu; +} + +// ============================================================================ +// Navigation +// ============================================================================ + +function handleNavigationClick(button, e) { + e.stopPropagation(); + + const navData = button.getAttribute('data-nav'); + if (navData) { + window.location.href = JSON.parse(navData).link; + return; + } + + const navMulti = button.getAttribute('data-nav-multi'); + if (navMulti) { + const items = JSON.parse(navMulti); + const title = button.classList.contains('caller') ? 'Choose a caller:' : 'Choose a callee:'; + showNavigationMenu(button, items, title); + } +} + +function scrollToTargetLine() { + if (!window.location.hash) return; + const target = document.querySelector(window.location.hash); + if (target) { + target.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } +} + +// ============================================================================ +// Sample Count & Intensity +// ============================================================================ + +function getSampleCount(line) { + let text; + if (colorMode === 'self') { + text = line.querySelector('.line-samples-self')?.textContent.trim().replace(/,/g, ''); + } else { + text = line.querySelector('.line-samples-cumulative')?.textContent.trim().replace(/,/g, ''); + } + return parseInt(text) || 0; +} + +function getIntensityClass(ratio) { + if (ratio > 0.75) return 'vhot'; + if (ratio > 0.5) return 'hot'; + if (ratio > 0.25) return 'warm'; + return 'cold'; +} + +// ============================================================================ +// Scroll Minimap +// ============================================================================ + +function buildScrollMarker() { + const existing = document.getElementById('scroll_marker'); + if (existing) existing.remove(); + + if (document.body.scrollHeight <= window.innerHeight) return; + + const allLines = document.querySelectorAll('.code-line'); + const lines = Array.from(allLines).filter(line => line.style.display !== 'none'); + const markerScale = window.innerHeight / document.body.scrollHeight; + const lineHeight = Math.min(Math.max(3, window.innerHeight / lines.length), 10); + const maxSamples = Math.max(...Array.from(lines, getSampleCount)); + + const scrollMarker = createElement('div', ''); + scrollMarker.id = 'scroll_marker'; + + let prevLine = -99, lastMark, lastTop; + + lines.forEach((line, index) => { + const samples = getSampleCount(line); + if (samples === 0) return; + + const lineTop = Math.floor(line.offsetTop * markerScale); + const lineNumber = index + 1; + const intensityClass = maxSamples > 0 ? getIntensityClass(samples / maxSamples) : 'cold'; + + if (lineNumber === prevLine + 1 && lastMark?.classList.contains(intensityClass)) { + lastMark.style.height = `${lineTop + lineHeight - lastTop}px`; + } else { + lastMark = createElement('div', `marker ${intensityClass}`); + lastMark.style.height = `${lineHeight}px`; + lastMark.style.top = `${lineTop}px`; + scrollMarker.appendChild(lastMark); + lastTop = lineTop; + } + + prevLine = lineNumber; + }); + + document.body.appendChild(scrollMarker); +} + +// ============================================================================ +// Toggle Controls +// ============================================================================ + +function updateToggleUI(toggleId, isOn) { + const toggle = document.getElementById(toggleId); + if (toggle) { + const track = toggle.querySelector('.toggle-track'); + const labels = toggle.querySelectorAll('.toggle-label'); + if (isOn) { + track.classList.add('on'); + labels[0].classList.remove('active'); + labels[1].classList.add('active'); + } else { + track.classList.remove('on'); + labels[0].classList.add('active'); + labels[1].classList.remove('active'); + } + } +} + +function toggleColdCode() { + coldCodeHidden = !coldCodeHidden; + applyHotFilter(); + updateToggleUI('toggle-cold', coldCodeHidden); + buildScrollMarker(); +} + +function applyHotFilter() { + const lines = document.querySelectorAll('.code-line'); + + lines.forEach(line => { + const selfSamples = line.querySelector('.line-samples-self')?.textContent.trim(); + const cumulativeSamples = line.querySelector('.line-samples-cumulative')?.textContent.trim(); + + let isCold; + if (colorMode === 'self') { + isCold = !selfSamples || selfSamples === ''; + } else { + isCold = !cumulativeSamples || cumulativeSamples === ''; + } + + if (isCold) { + line.style.display = coldCodeHidden ? 'none' : 'flex'; + } else { + line.style.display = 'flex'; + } + }); +} + +function toggleColorMode() { + colorMode = colorMode === 'self' ? 'cumulative' : 'self'; + const lines = document.querySelectorAll('.code-line'); + + lines.forEach(line => { + let bgColor; + if (colorMode === 'self') { + bgColor = line.getAttribute('data-self-color'); + } else { + bgColor = line.getAttribute('data-cumulative-color'); + } + + if (bgColor) { + line.style.background = bgColor; + } + }); + + updateToggleUI('toggle-color-mode', colorMode === 'cumulative'); + + if (coldCodeHidden) { + applyHotFilter(); + } + + buildScrollMarker(); +} + +// ============================================================================ +// Initialization +// ============================================================================ + +document.addEventListener('DOMContentLoaded', function() { + // Restore UI state (theme, etc.) + restoreUIState(); + + // Apply background colors + document.querySelectorAll('.code-line[data-bg-color]').forEach(line => { + const bgColor = line.getAttribute('data-bg-color'); + if (bgColor) { + line.style.background = bgColor; + } + }); + + // Initialize navigation buttons + document.querySelectorAll('.nav-btn').forEach(button => { + button.addEventListener('click', e => handleNavigationClick(button, e)); + }); + + // Initialize line number permalink handlers + document.querySelectorAll('.line-number').forEach(lineNum => { + lineNum.style.cursor = 'pointer'; + lineNum.addEventListener('click', e => { + window.location.hash = `line-${e.target.textContent.trim()}`; + }); + }); + + // Initialize toggle buttons + const toggleColdBtn = document.getElementById('toggle-cold'); + if (toggleColdBtn) { + toggleColdBtn.addEventListener('click', toggleColdCode); + } + + const colorModeBtn = document.getElementById('toggle-color-mode'); + if (colorModeBtn) { + colorModeBtn.addEventListener('click', toggleColorMode); + } + + // Build scroll marker + setTimeout(buildScrollMarker, 200); + + // Setup scroll-to-line behavior + setTimeout(scrollToTargetLine, 100); +}); + +// Close menu when clicking outside +document.addEventListener('click', e => { + if (currentMenu && !currentMenu.contains(e.target) && !e.target.classList.contains('nav-btn')) { + closeMenu(); + } +}); + +// Handle hash changes +window.addEventListener('hashchange', () => setTimeout(scrollToTargetLine, 50)); + +// Rebuild scroll marker on resize +window.addEventListener('resize', buildScrollMarker); diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_index.js b/Lib/profiling/sampling/_heatmap_assets/heatmap_index.js new file mode 100644 index 00000000000..5f3e65c3310 --- /dev/null +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_index.js @@ -0,0 +1,111 @@ +// Tachyon Profiler - Heatmap Index JavaScript +// Index page specific functionality + +// ============================================================================ +// Theme Support +// ============================================================================ + +function toggleTheme() { + const html = document.documentElement; + const current = html.getAttribute('data-theme') || 'light'; + const next = current === 'light' ? 'dark' : 'light'; + html.setAttribute('data-theme', next); + localStorage.setItem('heatmap-theme', next); + + // Update theme button icon + const btn = document.getElementById('theme-btn'); + if (btn) { + btn.innerHTML = next === 'dark' ? '☼' : '☾'; // sun or moon + } +} + +function restoreUIState() { + // Restore theme + const savedTheme = localStorage.getItem('heatmap-theme'); + if (savedTheme) { + document.documentElement.setAttribute('data-theme', savedTheme); + const btn = document.getElementById('theme-btn'); + if (btn) { + btn.innerHTML = savedTheme === 'dark' ? '☼' : '☾'; + } + } +} + +// ============================================================================ +// Type Section Toggle (stdlib, project, etc) +// ============================================================================ + +function toggleTypeSection(header) { + const section = header.parentElement; + const content = section.querySelector('.type-content'); + const icon = header.querySelector('.type-icon'); + + if (content.style.display === 'none') { + content.style.display = 'block'; + icon.textContent = '\u25BC'; + } else { + content.style.display = 'none'; + icon.textContent = '\u25B6'; + } +} + +// ============================================================================ +// Folder Toggle +// ============================================================================ + +function toggleFolder(header) { + const folder = header.parentElement; + const content = folder.querySelector('.folder-content'); + const icon = header.querySelector('.folder-icon'); + + if (content.style.display === 'none') { + content.style.display = 'block'; + icon.textContent = '\u25BC'; + folder.classList.remove('collapsed'); + } else { + content.style.display = 'none'; + icon.textContent = '\u25B6'; + folder.classList.add('collapsed'); + } +} + +// ============================================================================ +// Expand/Collapse All +// ============================================================================ + +function expandAll() { + // Expand all type sections + document.querySelectorAll('.type-section').forEach(section => { + const content = section.querySelector('.type-content'); + const icon = section.querySelector('.type-icon'); + content.style.display = 'block'; + icon.textContent = '\u25BC'; + }); + + // Expand all folders + document.querySelectorAll('.folder-node').forEach(folder => { + const content = folder.querySelector('.folder-content'); + const icon = folder.querySelector('.folder-icon'); + content.style.display = 'block'; + icon.textContent = '\u25BC'; + folder.classList.remove('collapsed'); + }); +} + +function collapseAll() { + document.querySelectorAll('.folder-node').forEach(folder => { + const content = folder.querySelector('.folder-content'); + const icon = folder.querySelector('.folder-icon'); + content.style.display = 'none'; + icon.textContent = '\u25B6'; + folder.classList.add('collapsed'); + }); +} + +// ============================================================================ +// Initialization +// ============================================================================ + +document.addEventListener('DOMContentLoaded', function() { + restoreUIState(); +}); diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_index_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_index_template.html new file mode 100644 index 00000000000..b71bd94c661 --- /dev/null +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_index_template.html @@ -0,0 +1,118 @@ + + + + + + Tachyon Profiler - Heatmap Report + + + +
+ +
+
+ Tachyon + + Heatmap Report +
+
+ +
+
+ + +
+ +
+
+
📄
+
+ + Files Profiled +
+
+
+
+
📊
+
+ + Total Snapshots +
+
+
+
+
+
+ + Duration +
+
+
+
+
+
+ + Samples/sec +
+
+
+
+
+
+
+ Error Rate +
+ +
+
+
+
+
+
+
+
+
💥
+ Missed Samples +
+ +
+
+
+
+
+
+ + +
+

Profiled Files

+
+ +
+ + +
+ +
+ +
+
+ + +
+ + Tachyon Profiler + + + Python Sampling Profiler + +
+
+ + + + diff --git a/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html new file mode 100644 index 00000000000..d8b26adfb02 --- /dev/null +++ b/Lib/profiling/sampling/_heatmap_assets/heatmap_pyfile_template.html @@ -0,0 +1,96 @@ + + + + + + <!-- FILENAME --> - Heatmap + + + +
+ +
+
+ Tachyon + + +
+
+ Back to Index + +
+
+ + +
+
+
+
+
Self Samples
+
+
+
+
Cumulative
+
+
+
+
Lines Hit
+
+
+
%
+
% of Total
+
+
+
+
Max Self
+
+
+
+
Max Total
+
+
+
+ + +
+
+ Intensity: +
+
+ Cold + + Hot +
+
+ Self Time +
+ Total Time +
+
+ Show All +
+ Hot Only +
+
+
+ + +
+
+
Line
+
Self
+
Total
+
Code
+
+ +
+
+ + + + diff --git a/Lib/profiling/sampling/_shared_assets/base.css b/Lib/profiling/sampling/_shared_assets/base.css new file mode 100644 index 00000000000..20516913496 --- /dev/null +++ b/Lib/profiling/sampling/_shared_assets/base.css @@ -0,0 +1,369 @@ +/* ========================================================================== + Python Profiler - Shared CSS Foundation + Design system shared between Flamegraph and Heatmap viewers + ========================================================================== */ + +/* -------------------------------------------------------------------------- + CSS Variables & Theme System + -------------------------------------------------------------------------- */ + +:root { + /* Typography */ + --font-sans: "Source Sans Pro", "Lucida Grande", "Lucida Sans Unicode", + "Geneva", "Verdana", sans-serif; + --font-mono: 'SF Mono', 'Monaco', 'Consolas', 'Liberation Mono', monospace; + + /* Python brand colors (theme-independent) */ + --python-blue: #3776ab; + --python-blue-light: #4584bb; + --python-blue-lighter: #5592cc; + --python-gold: #ffd43b; + --python-gold-dark: #ffcd02; + --python-gold-light: #ffdc5c; + + /* Heat palette - defined per theme below */ + + /* Layout */ + --sidebar-width: 280px; + --sidebar-collapsed: 44px; + --topbar-height: 56px; + --statusbar-height: 32px; + + /* Transitions */ + --transition-fast: 0.15s ease; + --transition-normal: 0.25s ease; +} + +/* Light theme (default) */ +:root, [data-theme="light"] { + --bg-primary: #ffffff; + --bg-secondary: #f8f9fa; + --bg-tertiary: #e9ecef; + --border: #e9ecef; + --border-subtle: #f0f2f5; + + --text-primary: #2e3338; + --text-secondary: #5a6c7d; + --text-muted: #8b949e; + + --accent: #3776ab; + --accent-hover: #2d5aa0; + --accent-glow: rgba(55, 118, 171, 0.15); + + --shadow-sm: 0 1px 3px rgba(0, 0, 0, 0.08); + --shadow-md: 0 4px 12px rgba(0, 0, 0, 0.1); + --shadow-lg: 0 8px 24px rgba(0, 0, 0, 0.15); + + --header-gradient: linear-gradient(135deg, #3776ab 0%, #4584bb 100%); + + /* Light mode heat palette - blue to yellow to orange to red (cold to hot) */ + --heat-1: #d6e9f8; + --heat-2: #a8d0ef; + --heat-3: #7ba3d1; + --heat-4: #ffe6a8; + --heat-5: #ffd43b; + --heat-6: #ffb84d; + --heat-7: #ff9966; + --heat-8: #ff6347; + + /* Code view specific */ + --code-bg: #ffffff; + --code-bg-line: #f8f9fa; + --code-border: #e9ecef; + --code-text: #2e3338; + --code-text-muted: #8b949e; + --code-accent: #3776ab; + + /* Navigation colors */ + --nav-caller: #2563eb; + --nav-caller-hover: #1d4ed8; + --nav-callee: #dc2626; + --nav-callee-hover: #b91c1c; +} + +/* Dark theme */ +[data-theme="dark"] { + --bg-primary: #0d1117; + --bg-secondary: #161b22; + --bg-tertiary: #21262d; + --border: #30363d; + --border-subtle: #21262d; + + --text-primary: #e6edf3; + --text-secondary: #8b949e; + --text-muted: #6e7681; + + --accent: #58a6ff; + --accent-hover: #79b8ff; + --accent-glow: rgba(88, 166, 255, 0.15); + + --shadow-sm: 0 1px 3px rgba(0, 0, 0, 0.3); + --shadow-md: 0 4px 12px rgba(0, 0, 0, 0.4); + --shadow-lg: 0 8px 24px rgba(0, 0, 0, 0.5); + + --header-gradient: linear-gradient(135deg, #21262d 0%, #30363d 100%); + + /* Dark mode heat palette - dark blue to teal to yellow to orange (cold to hot) */ + --heat-1: #1e3a5f; + --heat-2: #2d5580; + --heat-3: #4a7ba7; + --heat-4: #5a9fa8; + --heat-5: #7ec488; + --heat-6: #c4de6a; + --heat-7: #f4d44d; + --heat-8: #ff6b35; + + /* Code view specific - dark mode */ + --code-bg: #0d1117; + --code-bg-line: #161b22; + --code-border: #30363d; + --code-text: #e6edf3; + --code-text-muted: #6e7681; + --code-accent: #58a6ff; + + /* Navigation colors - dark theme friendly */ + --nav-caller: #58a6ff; + --nav-caller-hover: #4184e4; + --nav-callee: #f87171; + --nav-callee-hover: #e53e3e; +} + +/* -------------------------------------------------------------------------- + Base Styles + -------------------------------------------------------------------------- */ + +*, *::before, *::after { + box-sizing: border-box; +} + +html, body { + margin: 0; + padding: 0; +} + +body { + font-family: var(--font-sans); + font-size: 14px; + line-height: 1.6; + color: var(--text-primary); + background: var(--bg-primary); + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + transition: background var(--transition-normal), color var(--transition-normal); +} + +/* -------------------------------------------------------------------------- + Layout Structure + -------------------------------------------------------------------------- */ + +.app-layout { + display: flex; + flex-direction: column; +} + +/* -------------------------------------------------------------------------- + Top Bar + -------------------------------------------------------------------------- */ + +.top-bar { + height: var(--topbar-height); + background: var(--header-gradient); + display: flex; + align-items: center; + padding: 0 16px; + gap: 16px; + flex-shrink: 0; + box-shadow: 0 2px 10px rgba(55, 118, 171, 0.25); + border-bottom: 2px solid var(--python-gold); +} + +/* Brand / Logo */ +.brand { + display: flex; + align-items: center; + gap: 12px; + color: white; + text-decoration: none; + flex-shrink: 0; +} + +.brand-logo { + display: flex; + align-items: center; + justify-content: center; + width: 28px; + height: 28px; + flex-shrink: 0; +} + +/* Style the inlined SVG/img inside brand-logo */ +.brand-logo svg, +.brand-logo img { + width: 28px; + height: 28px; + display: block; + object-fit: contain; + filter: drop-shadow(0 1px 2px rgba(0, 0, 0, 0.2)); +} + +.brand-info { + display: flex; + flex-direction: column; + line-height: 1.15; +} + +.brand-text { + font-weight: 700; + font-size: 16px; + letter-spacing: -0.3px; + text-shadow: 0 1px 2px rgba(0, 0, 0, 0.15); +} + +.brand-subtitle { + font-weight: 500; + font-size: 10px; + opacity: 0.9; + text-transform: uppercase; + letter-spacing: 0.5px; +} + +.brand-divider { + width: 1px; + height: 16px; + background: rgba(255, 255, 255, 0.3); +} + +/* Toolbar */ +.toolbar { + display: flex; + align-items: center; + gap: 6px; + margin-left: auto; +} + +.toolbar-btn { + display: flex; + align-items: center; + justify-content: center; + width: 32px; + height: 32px; + padding: 0; + font-size: 15px; + color: white; + background: rgba(255, 255, 255, 0.12); + border: 1px solid rgba(255, 255, 255, 0.18); + border-radius: 6px; + cursor: pointer; + transition: all var(--transition-fast); +} + +.toolbar-btn:hover { + background: rgba(255, 255, 255, 0.22); + border-color: rgba(255, 255, 255, 0.35); +} + +.toolbar-btn:active { + transform: scale(0.95); +} + +/* -------------------------------------------------------------------------- + Status Bar + -------------------------------------------------------------------------- */ + +.status-bar { + height: var(--statusbar-height); + background: var(--bg-secondary); + border-top: 1px solid var(--border); + display: flex; + align-items: center; + padding: 0 16px; + gap: 16px; + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-secondary); + flex-shrink: 0; +} + +.status-item { + display: flex; + align-items: center; + gap: 5px; +} + +.status-item::before { + content: ''; + width: 4px; + height: 4px; + background: var(--python-gold); + border-radius: 50%; +} + +.status-item:first-child::before { + display: none; +} + +.status-label { + color: var(--text-muted); +} + +.status-value { + color: var(--text-primary); + font-weight: 500; +} + +.status-value.accent { + color: var(--accent); + font-weight: 600; +} + +/* -------------------------------------------------------------------------- + Animations + -------------------------------------------------------------------------- */ + +@keyframes fadeIn { + from { opacity: 0; } + to { opacity: 1; } +} + +@keyframes slideUp { + from { + opacity: 0; + transform: translateY(12px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +@keyframes shimmer { + 0% { left: -100%; } + 100% { left: 100%; } +} + +/* -------------------------------------------------------------------------- + Focus States (Accessibility) + -------------------------------------------------------------------------- */ + +button:focus-visible, +select:focus-visible, +input:focus-visible { + outline: 2px solid var(--python-gold); + outline-offset: 2px; +} + +/* -------------------------------------------------------------------------- + Shared Responsive + -------------------------------------------------------------------------- */ + +@media (max-width: 900px) { + .brand-subtitle { + display: none; + } +} + +@media (max-width: 600px) { + .toolbar-btn:not(.theme-toggle) { + display: none; + } +} diff --git a/Lib/profiling/sampling/_sync_coordinator.py b/Lib/profiling/sampling/_sync_coordinator.py index 8716e654104..be63dbe3e90 100644 --- a/Lib/profiling/sampling/_sync_coordinator.py +++ b/Lib/profiling/sampling/_sync_coordinator.py @@ -10,6 +10,7 @@ import socket import runpy import time +import types from typing import List, NoReturn @@ -175,15 +176,21 @@ def _execute_script(script_path: str, script_args: List[str], cwd: str) -> None: try: with open(script_path, 'rb') as f: source_code = f.read() + except FileNotFoundError as e: raise TargetError(f"Script file not found: {script_path}") from e except PermissionError as e: raise TargetError(f"Permission denied reading script: {script_path}") from e try: - # Compile and execute the script - code = compile(source_code, script_path, 'exec') - exec(code, {'__name__': '__main__', '__file__': script_path}) + main_module = types.ModuleType("__main__") + main_module.__file__ = script_path + main_module.__builtins__ = __builtins__ + # gh-140729: Create a __mp_main__ module to allow pickling + sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module + + code = compile(source_code, script_path, 'exec', module='__main__') + exec(code, main_module.__dict__) except SyntaxError as e: raise TargetError(f"Syntax error in script {script_path}: {e}") from e except SystemExit: diff --git a/Lib/profiling/sampling/cli.py b/Lib/profiling/sampling/cli.py new file mode 100644 index 00000000000..5c0e39d7737 --- /dev/null +++ b/Lib/profiling/sampling/cli.py @@ -0,0 +1,718 @@ +"""Command-line interface for the sampling profiler.""" + +import argparse +import os +import socket +import subprocess +import sys + +from .sample import sample, sample_live +from .pstats_collector import PstatsCollector +from .stack_collector import CollapsedStackCollector, FlamegraphCollector +from .heatmap_collector import HeatmapCollector +from .gecko_collector import GeckoCollector +from .constants import ( + PROFILING_MODE_ALL, + PROFILING_MODE_WALL, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + SORT_MODE_NSAMPLES, + SORT_MODE_TOTTIME, + SORT_MODE_CUMTIME, + SORT_MODE_SAMPLE_PCT, + SORT_MODE_CUMUL_PCT, + SORT_MODE_NSAMPLES_CUMUL, +) + +try: + from .live_collector import LiveStatsCollector +except ImportError: + LiveStatsCollector = None + + +class CustomFormatter( + argparse.ArgumentDefaultsHelpFormatter, + argparse.RawDescriptionHelpFormatter, +): + """Custom formatter that combines default values display with raw description formatting.""" + pass + + +_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. + +Commands: + run Run and profile a script or module + attach Attach to and profile a running process + +Examples: + # Run and profile a script + python -m profiling.sampling run script.py arg1 arg2 + + # Attach to a running process + python -m profiling.sampling attach 1234 + + # Live interactive mode for a script + python -m profiling.sampling run --live script.py + + # Live interactive mode for a running process + python -m profiling.sampling attach --live 1234 + +Use 'python -m profiling.sampling --help' for command-specific help.""" + + +# Constants for socket synchronization +_SYNC_TIMEOUT = 5.0 +_PROCESS_KILL_TIMEOUT = 2.0 +_READY_MESSAGE = b"ready" +_RECV_BUFFER_SIZE = 1024 + +# Format configuration +FORMAT_EXTENSIONS = { + "pstats": "pstats", + "collapsed": "txt", + "flamegraph": "html", + "gecko": "json", + "heatmap": "html", +} + +COLLECTOR_MAP = { + "pstats": PstatsCollector, + "collapsed": CollapsedStackCollector, + "flamegraph": FlamegraphCollector, + "gecko": GeckoCollector, + "heatmap": HeatmapCollector, +} + + +def _parse_mode(mode_string): + """Convert mode string to mode constant.""" + mode_map = { + "wall": PROFILING_MODE_WALL, + "cpu": PROFILING_MODE_CPU, + "gil": PROFILING_MODE_GIL, + } + return mode_map[mode_string] + + +def _run_with_sync(original_cmd, suppress_output=False): + """Run a command with socket-based synchronization and return the process.""" + # Create a TCP socket for synchronization with better socket options + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock: + # Set SO_REUSEADDR to avoid "Address already in use" errors + sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port + sync_port = sync_sock.getsockname()[1] + sync_sock.listen(1) + sync_sock.settimeout(_SYNC_TIMEOUT) + + # Get current working directory to preserve it + cwd = os.getcwd() + + # Build command using the sync coordinator + target_args = original_cmd[1:] # Remove python executable + cmd = ( + sys.executable, + "-m", + "profiling.sampling._sync_coordinator", + str(sync_port), + cwd, + ) + tuple(target_args) + + # Start the process with coordinator + # Suppress stdout/stderr if requested (for live mode) + popen_kwargs = {} + if suppress_output: + popen_kwargs["stdin"] = subprocess.DEVNULL + popen_kwargs["stdout"] = subprocess.DEVNULL + popen_kwargs["stderr"] = subprocess.DEVNULL + + process = subprocess.Popen(cmd, **popen_kwargs) + + try: + # Wait for ready signal with timeout + with sync_sock.accept()[0] as conn: + ready_signal = conn.recv(_RECV_BUFFER_SIZE) + + if ready_signal != _READY_MESSAGE: + raise RuntimeError( + f"Invalid ready signal received: {ready_signal!r}" + ) + + except socket.timeout: + # If we timeout, kill the process and raise an error + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=_PROCESS_KILL_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + raise RuntimeError( + "Process failed to signal readiness within timeout" + ) + + return process + + +def _add_sampling_options(parser): + """Add sampling configuration options to a parser.""" + sampling_group = parser.add_argument_group("Sampling configuration") + sampling_group.add_argument( + "-i", + "--interval", + type=int, + default=100, + metavar="MICROSECONDS", + help="sampling interval", + ) + sampling_group.add_argument( + "-d", + "--duration", + type=int, + default=10, + metavar="SECONDS", + help="Sampling duration", + ) + sampling_group.add_argument( + "-a", + "--all-threads", + action="store_true", + help="Sample all threads in the process instead of just the main thread", + ) + sampling_group.add_argument( + "--realtime-stats", + action="store_true", + help="Print real-time sampling statistics (Hz, mean, min, max) during profiling", + ) + sampling_group.add_argument( + "--native", + action="store_true", + help='Include artificial "" frames to denote calls to non-Python code', + ) + sampling_group.add_argument( + "--no-gc", + action="store_false", + dest="gc", + help='Don\'t include artificial "" frames to denote active garbage collection', + ) + + +def _add_mode_options(parser): + """Add mode options to a parser.""" + mode_group = parser.add_argument_group("Mode options") + mode_group.add_argument( + "--mode", + choices=["wall", "cpu", "gil"], + default="wall", + help="Sampling mode: wall (all samples), cpu (only samples when thread is on CPU), " + "gil (only samples when thread holds the GIL)", + ) + + +def _add_format_options(parser): + """Add output format options to a parser.""" + output_group = parser.add_argument_group("Output options") + format_group = output_group.add_mutually_exclusive_group() + format_group.add_argument( + "--pstats", + action="store_const", + const="pstats", + dest="format", + help="Generate pstats output (default)", + ) + format_group.add_argument( + "--collapsed", + action="store_const", + const="collapsed", + dest="format", + help="Generate collapsed stack traces for flamegraphs", + ) + format_group.add_argument( + "--flamegraph", + action="store_const", + const="flamegraph", + dest="format", + help="Generate interactive HTML flamegraph visualization", + ) + format_group.add_argument( + "--gecko", + action="store_const", + const="gecko", + dest="format", + help="Generate Gecko format for Firefox Profiler", + ) + format_group.add_argument( + "--heatmap", + action="store_const", + const="heatmap", + dest="format", + help="Generate interactive HTML heatmap visualization with line-level sample counts", + ) + parser.set_defaults(format="pstats") + + output_group.add_argument( + "-o", + "--output", + dest="outfile", + help="Output path (default: stdout for pstats, auto-generated for others). " + "For heatmap: directory name (default: heatmap_PID)", + ) + + +def _add_pstats_options(parser): + """Add pstats-specific display options to a parser.""" + pstats_group = parser.add_argument_group("pstats format options") + pstats_group.add_argument( + "--sort", + choices=[ + "nsamples", + "tottime", + "cumtime", + "sample-pct", + "cumul-pct", + "nsamples-cumul", + "name", + ], + default=None, + help="Sort order for pstats output (default: nsamples)", + ) + pstats_group.add_argument( + "-l", + "--limit", + type=int, + default=None, + help="Limit the number of rows in the output (default: 15)", + ) + pstats_group.add_argument( + "--no-summary", + action="store_true", + help="Disable the summary section in the pstats output", + ) + + +def _sort_to_mode(sort_choice): + """Convert sort choice string to SORT_MODE constant.""" + sort_map = { + "nsamples": SORT_MODE_NSAMPLES, + "tottime": SORT_MODE_TOTTIME, + "cumtime": SORT_MODE_CUMTIME, + "sample-pct": SORT_MODE_SAMPLE_PCT, + "cumul-pct": SORT_MODE_CUMUL_PCT, + "nsamples-cumul": SORT_MODE_NSAMPLES_CUMUL, + "name": -1, + } + return sort_map.get(sort_choice, SORT_MODE_NSAMPLES) + + +def _create_collector(format_type, interval, skip_idle): + """Create the appropriate collector based on format type. + + Args: + format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko') + interval: Sampling interval in microseconds + skip_idle: Whether to skip idle samples + + Returns: + A collector instance of the appropriate type + """ + collector_class = COLLECTOR_MAP.get(format_type) + if collector_class is None: + raise ValueError(f"Unknown format: {format_type}") + + # Gecko format never skips idle (it needs both GIL and CPU data) + if format_type == "gecko": + skip_idle = False + + return collector_class(interval, skip_idle=skip_idle) + + +def _generate_output_filename(format_type, pid): + """Generate output filename based on format and PID. + + Args: + format_type: The output format + pid: Process ID + + Returns: + Generated filename + """ + extension = FORMAT_EXTENSIONS.get(format_type, "txt") + # For heatmap, use cleaner directory name without extension + if format_type == "heatmap": + return f"heatmap_{pid}" + return f"{format_type}.{pid}.{extension}" + + +def _handle_output(collector, args, pid, mode): + """Handle output for the collector based on format and arguments. + + Args: + collector: The collector instance with profiling data + args: Parsed command-line arguments + pid: Process ID (for generating filenames) + mode: Profiling mode used + """ + if args.format == "pstats": + if args.outfile: + collector.export(args.outfile) + else: + # Print to stdout with defaults applied + sort_choice = args.sort if args.sort is not None else "nsamples" + limit = args.limit if args.limit is not None else 15 + sort_mode = _sort_to_mode(sort_choice) + collector.print_stats( + sort_mode, limit, not args.no_summary, mode + ) + else: + # Export to file + filename = args.outfile or _generate_output_filename(args.format, pid) + collector.export(filename) + + +def _validate_args(args, parser): + """Validate format-specific options and live mode requirements. + + Args: + args: Parsed command-line arguments + parser: ArgumentParser instance for error reporting + """ + # Check if live mode is available + if hasattr(args, 'live') and args.live and LiveStatsCollector is None: + parser.error( + "Live mode requires the curses module, which is not available." + ) + + # Live mode is incompatible with format options + if hasattr(args, 'live') and args.live: + if args.format != "pstats": + format_flag = f"--{args.format}" + parser.error( + f"--live is incompatible with {format_flag}. Live mode uses a TUI interface." + ) + + # Live mode is also incompatible with pstats-specific options + issues = [] + if args.sort is not None: + issues.append("--sort") + if args.limit is not None: + issues.append("--limit") + if args.no_summary: + issues.append("--no-summary") + + if issues: + parser.error( + f"Options {', '.join(issues)} are incompatible with --live. " + "Live mode uses a TUI interface with its own controls." + ) + return + + # Validate gecko mode doesn't use non-wall mode + if args.format == "gecko" and args.mode != "wall": + parser.error( + "--mode option is incompatible with --gecko. " + "Gecko format automatically includes both GIL-holding and CPU status analysis." + ) + + # Validate pstats-specific options are only used with pstats format + if args.format != "pstats": + issues = [] + if args.sort is not None: + issues.append("--sort") + if args.limit is not None: + issues.append("--limit") + if args.no_summary: + issues.append("--no-summary") + + if issues: + format_flag = f"--{args.format}" + parser.error( + f"Options {', '.join(issues)} are only valid with --pstats, not {format_flag}" + ) + + +def main(): + """Main entry point for the CLI.""" + # Create the main parser + parser = argparse.ArgumentParser( + description=_HELP_DESCRIPTION, + formatter_class=CustomFormatter, + ) + + # Create subparsers for commands + subparsers = parser.add_subparsers( + dest="command", required=True, help="Command to run" + ) + + # === RUN COMMAND === + run_parser = subparsers.add_parser( + "run", + help="Run and profile a script or module", + formatter_class=CustomFormatter, + description="""Run and profile a Python script or module + +Examples: + # Run and profile a module + python -m profiling.sampling run -m mymodule arg1 arg2 + + # Generate flamegraph from a script + python -m profiling.sampling run --flamegraph -o output.html script.py + + # Profile with custom interval and duration + python -m profiling.sampling run -i 50 -d 30 script.py + + # Save collapsed stacks to file + python -m profiling.sampling run --collapsed -o stacks.txt script.py + + # Live interactive mode for a script + python -m profiling.sampling run --live script.py""", + ) + run_parser.add_argument( + "-m", + "--module", + action="store_true", + help="Run target as a module (like python -m)", + ) + run_parser.add_argument( + "target", + help="Script file or module name to profile", + ) + run_parser.add_argument( + "args", + nargs=argparse.REMAINDER, + help="Arguments to pass to the script or module", + ) + run_parser.add_argument( + "--live", + action="store_true", + help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)", + ) + _add_sampling_options(run_parser) + _add_mode_options(run_parser) + _add_format_options(run_parser) + _add_pstats_options(run_parser) + + # === ATTACH COMMAND === + attach_parser = subparsers.add_parser( + "attach", + help="Attach to and profile a running process", + formatter_class=CustomFormatter, + description="""Attach to a running process and profile it + +Examples: + # Profile all threads, sort by total time + python -m profiling.sampling attach -a --sort tottime 1234 + + # Live interactive mode for a running process + python -m profiling.sampling attach --live 1234""", + ) + attach_parser.add_argument( + "pid", + type=int, + help="Process ID to attach to", + ) + attach_parser.add_argument( + "--live", + action="store_true", + help="Interactive TUI profiler (top-like interface, press 'q' to quit, 's' to cycle sort)", + ) + _add_sampling_options(attach_parser) + _add_mode_options(attach_parser) + _add_format_options(attach_parser) + _add_pstats_options(attach_parser) + + # Parse arguments + args = parser.parse_args() + + # Validate arguments + _validate_args(args, parser) + + # Command dispatch table + command_handlers = { + "run": _handle_run, + "attach": _handle_attach, + } + + # Execute the appropriate command + handler = command_handlers.get(args.command) + if handler: + handler(args) + else: + parser.error(f"Unknown command: {args.command}") + + +def _handle_attach(args): + """Handle the 'attach' command.""" + # Check if live mode is requested + if args.live: + _handle_live_attach(args, args.pid) + return + + # Use PROFILING_MODE_ALL for gecko format + mode = ( + PROFILING_MODE_ALL + if args.format == "gecko" + else _parse_mode(args.mode) + ) + + # Determine skip_idle based on mode + skip_idle = ( + mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False + ) + + # Create the appropriate collector + collector = _create_collector(args.format, args.interval, skip_idle) + + # Sample the process + collector = sample( + args.pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + + # Handle output + _handle_output(collector, args, args.pid, mode) + + +def _handle_run(args): + """Handle the 'run' command.""" + # Check if live mode is requested + if args.live: + _handle_live_run(args) + return + + # Build the command to run + if args.module: + cmd = (sys.executable, "-m", args.target, *args.args) + else: + cmd = (sys.executable, args.target, *args.args) + + # Run with synchronization + process = _run_with_sync(cmd, suppress_output=False) + + # Use PROFILING_MODE_ALL for gecko format + mode = ( + PROFILING_MODE_ALL + if args.format == "gecko" + else _parse_mode(args.mode) + ) + + # Determine skip_idle based on mode + skip_idle = ( + mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False + ) + + # Create the appropriate collector + collector = _create_collector(args.format, args.interval, skip_idle) + + # Profile the subprocess + try: + collector = sample( + process.pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + + # Handle output + _handle_output(collector, args, process.pid, mode) + finally: + # Clean up the subprocess + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=_PROCESS_KILL_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +def _handle_live_attach(args, pid): + """Handle live mode for an existing process.""" + mode = _parse_mode(args.mode) + + # Determine skip_idle based on mode + skip_idle = mode != PROFILING_MODE_WALL + + # Create live collector with default settings + collector = LiveStatsCollector( + args.interval, + skip_idle=skip_idle, + sort_by="tottime", # Default initial sort + limit=20, # Default limit + pid=pid, + mode=mode, + ) + + # Sample in live mode + sample_live( + pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + + +def _handle_live_run(args): + """Handle live mode for running a script/module.""" + # Build the command to run + if args.module: + cmd = (sys.executable, "-m", args.target, *args.args) + else: + cmd = (sys.executable, args.target, *args.args) + + # Run with synchronization, suppressing output for live mode + process = _run_with_sync(cmd, suppress_output=True) + + mode = _parse_mode(args.mode) + + # Determine skip_idle based on mode + skip_idle = mode != PROFILING_MODE_WALL + + # Create live collector with default settings + collector = LiveStatsCollector( + args.interval, + skip_idle=skip_idle, + sort_by="tottime", # Default initial sort + limit=20, # Default limit + pid=process.pid, + mode=mode, + ) + + # Profile the subprocess in live mode + try: + sample_live( + process.pid, + collector, + duration_sec=args.duration, + all_threads=args.all_threads, + realtime_stats=args.realtime_stats, + mode=mode, + native=args.native, + gc=args.gc, + ) + finally: + # Clean up the subprocess + if process.poll() is None: + process.terminate() + try: + process.wait(timeout=_PROCESS_KILL_TIMEOUT) + except subprocess.TimeoutExpired: + process.kill() + process.wait() + + +if __name__ == "__main__": + main() diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py index b7a033ac0a6..6187f351cb5 100644 --- a/Lib/profiling/sampling/collector.py +++ b/Lib/profiling/sampling/collector.py @@ -1,33 +1,114 @@ from abc import ABC, abstractmethod - -# Enums are slow -THREAD_STATE_RUNNING = 0 -THREAD_STATE_IDLE = 1 -THREAD_STATE_GIL_WAIT = 2 -THREAD_STATE_UNKNOWN = 3 - -STATUS = { - THREAD_STATE_RUNNING: "running", - THREAD_STATE_IDLE: "idle", - THREAD_STATE_GIL_WAIT: "gil_wait", - THREAD_STATE_UNKNOWN: "unknown", -} +from .constants import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, +) class Collector(ABC): @abstractmethod def collect(self, stack_frames): """Collect profiling data from stack frames.""" + def collect_failed_sample(self): + """Collect data about a failed sample attempt.""" + @abstractmethod def export(self, filename): """Export collected data to a file.""" def _iter_all_frames(self, stack_frames, skip_idle=False): - """Iterate over all frame stacks from all interpreters and threads.""" for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: - if skip_idle and thread_info.status != THREAD_STATE_RUNNING: - continue + # skip_idle now means: skip if thread is not actively running + # A thread is "active" if it has the GIL OR is on CPU + if skip_idle: + status_flags = thread_info.status + has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL) + on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU) + if not (has_gil or on_cpu): + continue frames = thread_info.frame_info if frames: yield frames, thread_info.thread_id + + def _is_gc_frame(self, frame): + if isinstance(frame, tuple): + funcname = frame[2] if len(frame) >= 3 else "" + else: + funcname = getattr(frame, "funcname", "") + + return "" in funcname or "gc_collect" in funcname + + def _collect_thread_status_stats(self, stack_frames): + """Collect aggregate and per-thread status statistics from a sample. + + Returns: + tuple: (aggregate_status_counts, has_gc_frame, per_thread_stats) + - aggregate_status_counts: dict with has_gil, on_cpu, etc. + - has_gc_frame: bool indicating if any thread has GC frames + - per_thread_stats: dict mapping thread_id to per-thread counts + """ + status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + } + has_gc_frame = False + per_thread_stats = {} + + for interpreter_info in stack_frames: + threads = getattr(interpreter_info, "threads", []) + for thread_info in threads: + status_counts["total"] += 1 + + # Track thread status using bit flags + status_flags = getattr(thread_info, "status", 0) + + if status_flags & THREAD_STATUS_HAS_GIL: + status_counts["has_gil"] += 1 + if status_flags & THREAD_STATUS_ON_CPU: + status_counts["on_cpu"] += 1 + if status_flags & THREAD_STATUS_GIL_REQUESTED: + status_counts["gil_requested"] += 1 + if status_flags & THREAD_STATUS_UNKNOWN: + status_counts["unknown"] += 1 + + # Track per-thread statistics + thread_id = getattr(thread_info, "thread_id", None) + if thread_id is not None: + if thread_id not in per_thread_stats: + per_thread_stats[thread_id] = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + "gc_samples": 0, + } + + thread_stats = per_thread_stats[thread_id] + thread_stats["total"] += 1 + + if status_flags & THREAD_STATUS_HAS_GIL: + thread_stats["has_gil"] += 1 + if status_flags & THREAD_STATUS_ON_CPU: + thread_stats["on_cpu"] += 1 + if status_flags & THREAD_STATUS_GIL_REQUESTED: + thread_stats["gil_requested"] += 1 + if status_flags & THREAD_STATUS_UNKNOWN: + thread_stats["unknown"] += 1 + + # Check for GC frames in this thread + frames = getattr(thread_info, "frame_info", None) + if frames: + for frame in frames: + if self._is_gc_frame(frame): + thread_stats["gc_samples"] += 1 + has_gc_frame = True + break + + return status_counts, has_gc_frame, per_thread_stats diff --git a/Lib/profiling/sampling/constants.py b/Lib/profiling/sampling/constants.py new file mode 100644 index 00000000000..be2ae60a88f --- /dev/null +++ b/Lib/profiling/sampling/constants.py @@ -0,0 +1,30 @@ +"""Constants for the sampling profiler.""" + +# Profiling mode constants +PROFILING_MODE_WALL = 0 +PROFILING_MODE_CPU = 1 +PROFILING_MODE_GIL = 2 +PROFILING_MODE_ALL = 3 # Combines GIL + CPU checks + +# Sort mode constants +SORT_MODE_NSAMPLES = 0 +SORT_MODE_TOTTIME = 1 +SORT_MODE_CUMTIME = 2 +SORT_MODE_SAMPLE_PCT = 3 +SORT_MODE_CUMUL_PCT = 4 +SORT_MODE_NSAMPLES_CUMUL = 5 + +# Thread status flags +try: + from _remote_debugging import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, + ) +except ImportError: + # Fallback for tests or when module is not available + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + THREAD_STATUS_UNKNOWN = (1 << 2) + THREAD_STATUS_GIL_REQUESTED = (1 << 3) diff --git a/Lib/profiling/sampling/flamegraph.css b/Lib/profiling/sampling/flamegraph.css deleted file mode 100644 index 67754ca609a..00000000000 --- a/Lib/profiling/sampling/flamegraph.css +++ /dev/null @@ -1,488 +0,0 @@ -body { - font-family: - "Source Sans Pro", "Lucida Grande", "Lucida Sans Unicode", "Geneva", - "Verdana", sans-serif; - margin: 0; - padding: 0; - background: #ffffff; - color: #2e3338; - line-height: 1.6; -} - -.header { - background: linear-gradient(135deg, #3776ab 0%, #4584bb 100%); - color: white; - padding: 32px 0; - box-shadow: 0 2px 10px rgba(55, 118, 171, 0.2); -} - -.header-content { - max-width: 1200px; - margin: 0 auto; - padding: 0 24px; - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; - text-align: center; - gap: 20px; -} - -.python-logo { - width: auto; - height: 72px; - margin-bottom: 12px; /* tighter spacing to avoid visual gap */ - flex-shrink: 0; - display: flex; - align-items: center; - justify-content: center; -} - -.python-logo img { - height: 72px; - width: auto; - display: block; /* avoid baseline alignment issues */ - vertical-align: middle; - /* subtle shadow that does not affect layout */ - filter: drop-shadow(0 2px 6px rgba(0, 0, 0, 0.1)); -} - -.header-text h1 { - margin: 0; - font-size: 2.5em; - font-weight: 600; - color: white; - text-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); -} - -.header-text .subtitle { - margin: 8px 0 0 0; - font-size: 1.1em; - color: rgba(255, 255, 255, 0.9); - font-weight: 300; -} - -.header-search { - width: 100%; - max-width: 500px; -} - -.header-search #search-input { - width: 100%; - padding: 12px 20px; - border: 2px solid rgba(255, 255, 255, 0.2); - border-radius: 25px; - font-size: 16px; - font-family: inherit; - background: rgba(255, 255, 255, 0.95); - color: #2e3338; - transition: all 0.3s ease; - box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1); - backdrop-filter: blur(10px); -} - -.header-search #search-input:focus { - outline: none; - border-color: rgba(255, 255, 255, 0.8); - background: rgba(255, 255, 255, 1); - box-shadow: 0 6px 20px rgba(0, 0, 0, 0.15); - transform: translateY(-2px); -} - -.header-search #search-input::placeholder { - color: #6c757d; -} - -.stats-section { - background: #ffffff; - padding: 24px 0; - border-bottom: 1px solid #e9ecef; -} - -.stats-container { - max-width: 1200px; - margin: 0 auto; - padding: 0 24px; - display: grid; - grid-template-columns: repeat(3, 1fr); - gap: 20px; -} - -.stat-card { - background: #ffffff; - border: 1px solid #e9ecef; - border-radius: 12px; - padding: 20px; - display: flex; - align-items: flex-start; - gap: 16px; - box-shadow: 0 2px 8px rgba(0, 0, 0, 0.06); - transition: all 0.2s ease; - min-height: 120px; -} - -.stat-card:hover { - box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1); - transform: translateY(-2px); -} - -.stat-icon { - font-size: 32px; - width: 56px; - height: 56px; - display: flex; - align-items: center; - justify-content: center; - background: linear-gradient(135deg, #3776ab 0%, #4584bb 100%); - border-radius: 50%; - flex-shrink: 0; - box-shadow: 0 2px 8px rgba(55, 118, 171, 0.3); -} - -.stat-content { - flex: 1; -} - -.stat-label { - font-size: 14px; - color: #5a6c7d; - font-weight: 500; - margin-bottom: 4px; - text-transform: uppercase; - letter-spacing: 0.5px; -} - -.stat-value { - font-size: 16px; - font-weight: 700; - color: #2e3338; - line-height: 1.3; - margin-bottom: 4px; - word-break: break-word; - overflow-wrap: break-word; -} - -.stat-file { - font-size: 12px; - color: #8b949e; - font-weight: 400; - margin-bottom: 2px; - font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; - word-break: break-word; - overflow-wrap: break-word; -} - -.stat-detail { - font-size: 12px; - color: #5a6c7d; - font-weight: 400; - line-height: 1.4; - font-family: 'SF Mono', 'Monaco', 'Consolas', monospace; - word-break: break-word; - overflow-wrap: break-word; -} - -.controls { - background: #f8f9fa; - border-bottom: 1px solid #e9ecef; - padding: 20px 0; - text-align: center; -} - -.controls-content { - max-width: 1200px; - margin: 0 auto; - padding: 0 24px; - text-align: center; -} - - -.controls button { - background: #3776ab; - color: white; - border: none; - padding: 12px 24px; - margin: 0 8px; - border-radius: 6px; - cursor: pointer; - font-weight: 600; - font-size: 14px; - font-family: inherit; - transition: all 0.2s ease; - box-shadow: 0 2px 4px rgba(55, 118, 171, 0.2); -} - -.controls button:hover { - background: #2d5aa0; - transform: translateY(-1px); - box-shadow: 0 4px 8px rgba(55, 118, 171, 0.3); -} - -.controls button.secondary { - background: #ffd43b; - color: #2e3338; -} - -.controls button.secondary:hover { - background: #ffcd02; -} - -.thread-filter-wrapper { - display: none; - align-items: center; - margin-left: 16px; - background: white; - border-radius: 6px; - padding: 4px 8px 4px 12px; - border: 2px solid #3776ab; - transition: all 0.2s ease; -} - -.thread-filter-wrapper:hover { - border-color: #2d5aa0; - box-shadow: 0 2px 6px rgba(55, 118, 171, 0.2); -} - -.thread-filter-label { - color: #3776ab; - font-size: 14px; - font-weight: 600; - margin-right: 8px; - display: flex; - align-items: center; -} - -.thread-filter-select { - background: transparent; - color: #2e3338; - border: none; - padding: 4px 24px 4px 4px; - font-size: 14px; - font-weight: 600; - cursor: pointer; - min-width: 120px; - font-family: inherit; - appearance: none; - -webkit-appearance: none; - -moz-appearance: none; - background-image: url("data:image/svg+xml;charset=UTF-8,%3csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none' stroke='%233776ab' stroke-width='2' stroke-linecap='round' stroke-linejoin='round'%3e%3cpolyline points='6 9 12 15 18 9'%3e%3c/polyline%3e%3c/svg%3e"); - background-repeat: no-repeat; - background-position: right 4px center; - background-size: 16px; -} - -.thread-filter-select:focus { - outline: none; -} - -.thread-filter-select:hover { - color: #3776ab; -} - -.thread-filter-select option { - padding: 8px; - background: white; - color: #2e3338; - font-weight: normal; -} - -#chart { - width: 100%; - height: calc(100vh - 160px); - overflow: hidden; - background: #ffffff; - padding: 0 40px; -} - -.d3-flame-graph rect { - /* Prefer selector specificity instead of !important */ - stroke: rgba(55, 118, 171, 0.3); - stroke-width: 1px; - cursor: pointer; - transition: all 0.1s ease; -} - -.d3-flame-graph rect:hover { - stroke: #3776ab; - stroke-width: 2px; - filter: brightness(1.05); -} - -.d3-flame-graph text { - /* Ensure labels use our font without !important */ - font-family: "Source Sans Pro", sans-serif; - font-size: 12px; - font-weight: 500; - fill: #2e3338; - pointer-events: none; -} - -.info-panel { - position: fixed; - bottom: 24px; - left: 84px; /* Leave space for the button */ - background: white; - padding: 24px; - border-radius: 8px; - border: 1px solid #e9ecef; - font-size: 14px; - max-width: 280px; - box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); - z-index: 1000; - display: none; -} - -.info-panel h3 { - margin: 0 0 16px 0; - color: #3776ab; - font-weight: 600; - font-size: 16px; - border-bottom: 2px solid #ffd43b; - padding-bottom: 8px; -} - -.info-panel p { - margin: 12px 0; - color: #5a6c7d; - line-height: 1.5; -} - -.info-panel strong { - color: #3776ab; -} - -#show-info-btn { - position: fixed; - bottom: 32px; - left: 32px; - z-index: 1100; - width: 44px; - height: 44px; - border-radius: 50%; - background: #3776ab; - color: white; - border: none; - font-size: 24px; - box-shadow: 0 2px 8px rgba(55, 118, 171, 0.15); - cursor: pointer; - display: flex; - align-items: center; - justify-content: center; - transition: background 0.2s; -} - -#show-info-btn:hover { - background: #2d5aa0; -} - -#close-info-btn { - position: absolute; - top: 8px; - right: 12px; - background: none; - border: none; - font-size: 20px; - cursor: pointer; - color: #3776ab; -} - -@media (max-width: 600px) { - .python-logo { height: 48px; } - .python-logo img { height: 48px; } - #show-info-btn { - left: 8px; - bottom: 8px; - } - .info-panel { - left: 60px; /* Still leave space for button */ - bottom: 8px; - max-width: 90vw; - } -} - -.legend-panel { - position: fixed; - top: 24px; - left: 24px; - background: white; - padding: 24px; - border-radius: 8px; - border: 1px solid #e9ecef; - font-size: 14px; - max-width: 320px; - box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1); - display: none; - z-index: 1001; -} - -.legend-panel h3 { - margin: 0 0 20px 0; - color: #3776ab; - font-weight: 600; - font-size: 18px; - text-align: center; - border-bottom: 2px solid #ffd43b; - padding-bottom: 8px; -} - -.legend-item { - display: flex; - align-items: center; - margin: 12px 0; - padding: 10px; - border-radius: 6px; - background: #f8f9fa; - border: 1px solid #e9ecef; -} - -.legend-color { - width: 28px; - height: 18px; - border-radius: 4px; - margin-right: 16px; - border: 1px solid rgba(0, 0, 0, 0.1); - flex-shrink: 0; -} - -.legend-label { - color: #2e3338; - font-weight: 600; - flex: 1; -} - -.legend-description { - color: #5a6c7d; - font-size: 12px; - margin-top: 2px; - font-weight: 400; -} - -.chart-container { - background: #ffffff; - margin: 0; - padding: 12px 0; -} - -/* Tooltip overflow fixes */ -.python-tooltip { - max-width: 500px !important; - word-wrap: break-word !important; - overflow-wrap: break-word !important; - box-sizing: border-box !important; -} - -/* Responsive tooltip adjustments */ -@media (max-width: 768px) { - .python-tooltip { - max-width: calc(100vw - 40px) !important; - max-height: calc(100vh - 80px) !important; - overflow-y: auto !important; - } -} - -@media (max-width: 480px) { - .python-tooltip { - max-width: calc(100vw - 20px) !important; - font-size: 12px !important; - } -} diff --git a/Lib/profiling/sampling/flamegraph.js b/Lib/profiling/sampling/flamegraph.js deleted file mode 100644 index 95ad7ca6184..00000000000 --- a/Lib/profiling/sampling/flamegraph.js +++ /dev/null @@ -1,632 +0,0 @@ -const EMBEDDED_DATA = {{FLAMEGRAPH_DATA}}; - -// Global string table for resolving string indices -let stringTable = []; -let originalData = null; -let currentThreadFilter = 'all'; - -// Function to resolve string indices to actual strings -function resolveString(index) { - if (typeof index === 'number' && index >= 0 && index < stringTable.length) { - return stringTable[index]; - } - // Fallback for non-indexed strings or invalid indices - return String(index); -} - -// Function to recursively resolve all string indices in flamegraph data -function resolveStringIndices(node) { - if (!node) return node; - - // Create a copy to avoid mutating the original - const resolved = { ...node }; - - // Resolve string fields - if (typeof resolved.name === 'number') { - resolved.name = resolveString(resolved.name); - } - if (typeof resolved.filename === 'number') { - resolved.filename = resolveString(resolved.filename); - } - if (typeof resolved.funcname === 'number') { - resolved.funcname = resolveString(resolved.funcname); - } - - // Resolve source lines if present - if (Array.isArray(resolved.source)) { - resolved.source = resolved.source.map(index => - typeof index === 'number' ? resolveString(index) : index - ); - } - - // Recursively resolve children - if (Array.isArray(resolved.children)) { - resolved.children = resolved.children.map(child => resolveStringIndices(child)); - } - - return resolved; -} - -// Python color palette - cold to hot -const pythonColors = [ - "#fff4bf", // Coldest - light yellow (<1%) - "#ffec9e", // Cold - yellow (1-3%) - "#ffe47d", // Cool - golden yellow (3-6%) - "#ffdc5c", // Medium - golden (6-12%) - "#ffd43b", // Warm - Python gold (12-18%) - "#5592cc", // Hot - light blue (18-35%) - "#4584bb", // Very hot - medium blue (35-60%) - "#3776ab", // Hottest - Python blue (≥60%) -]; - -function ensureLibraryLoaded() { - if (typeof flamegraph === "undefined") { - console.error("d3-flame-graph library not loaded"); - document.getElementById("chart").innerHTML = - '

Error: d3-flame-graph library failed to load

'; - throw new Error("d3-flame-graph library failed to load"); - } -} - -function createPythonTooltip(data) { - const pythonTooltip = flamegraph.tooltip.defaultFlamegraphTooltip(); - pythonTooltip.show = function (d, element) { - if (!this._tooltip) { - this._tooltip = d3 - .select("body") - .append("div") - .attr("class", "python-tooltip") - .style("position", "absolute") - .style("padding", "20px") - .style("background", "white") - .style("color", "#2e3338") - .style("border-radius", "8px") - .style("font-size", "14px") - .style("border", "1px solid #e9ecef") - .style("box-shadow", "0 8px 30px rgba(0, 0, 0, 0.15)") - .style("z-index", "1000") - .style("pointer-events", "none") - .style("font-weight", "400") - .style("line-height", "1.5") - .style("max-width", "500px") - .style("word-wrap", "break-word") - .style("overflow-wrap", "break-word") - .style("font-family", "'Source Sans Pro', sans-serif") - .style("opacity", 0); - } - - const timeMs = (d.data.value / 1000).toFixed(2); - const percentage = ((d.data.value / data.value) * 100).toFixed(2); - const calls = d.data.calls || 0; - const childCount = d.children ? d.children.length : 0; - const source = d.data.source; - - // Create source code section if available - let sourceSection = ""; - if (source && Array.isArray(source) && source.length > 0) { - const sourceLines = source - .map( - (line) => - `
${line - .replace(/&/g, "&") - .replace(//g, ">")}
`, - ) - .join(""); - - sourceSection = ` -
-
- Source Code: -
-
- ${sourceLines} -
-
`; - } else if (source) { - // Show debug info if source exists but isn't an array - sourceSection = ` -
-
- [Debug] - Source data type: ${typeof source} -
-
- ${JSON.stringify(source, null, 2)} -
-
`; - } - - // Resolve strings for display - const funcname = resolveString(d.data.funcname) || resolveString(d.data.name); - const filename = resolveString(d.data.filename) || ""; - - const tooltipHTML = ` -
-
- ${funcname} -
-
- ${filename}${d.data.lineno ? ":" + d.data.lineno : ""} -
-
- Execution Time: - ${timeMs} ms - - Percentage: - ${percentage}% - - ${calls > 0 ? ` - Function Calls: - ${calls.toLocaleString()} - ` : ''} - - ${childCount > 0 ? ` - Child Functions: - ${childCount} - ` : ''} -
- ${sourceSection} -
- ${childCount > 0 ? - "Click to focus on this function" : - "Leaf function - no children"} -
-
- `; - - // Get mouse position - const event = d3.event || window.event; - const mouseX = event.pageX || event.clientX; - const mouseY = event.pageY || event.clientY; - - // Calculate tooltip dimensions (default to 320px width if not rendered yet) - let tooltipWidth = 320; - let tooltipHeight = 200; - if (this._tooltip && this._tooltip.node()) { - const node = this._tooltip - .style("opacity", 0) - .style("display", "block") - .node(); - tooltipWidth = node.offsetWidth || 320; - tooltipHeight = node.offsetHeight || 200; - this._tooltip.style("display", null); - } - - // Calculate horizontal position: if overflow, show to the left of cursor - const padding = 10; - const rightEdge = mouseX + padding + tooltipWidth; - const viewportWidth = window.innerWidth; - let left; - if (rightEdge > viewportWidth) { - left = mouseX - tooltipWidth - padding; - if (left < 0) left = padding; // prevent off left edge - } else { - left = mouseX + padding; - } - - // Calculate vertical position: if overflow, show above cursor - const bottomEdge = mouseY + padding + tooltipHeight; - const viewportHeight = window.innerHeight; - let top; - if (bottomEdge > viewportHeight) { - top = mouseY - tooltipHeight - padding; - if (top < 0) top = padding; // prevent off top edge - } else { - top = mouseY + padding; - } - - this._tooltip - .html(tooltipHTML) - .style("left", left + "px") - .style("top", top + "px") - .transition() - .duration(200) - .style("opacity", 1); - }; - - // Override the hide method - pythonTooltip.hide = function () { - if (this._tooltip) { - this._tooltip.transition().duration(200).style("opacity", 0); - } - }; - return pythonTooltip; -} - -function createFlamegraph(tooltip, rootValue) { - let chart = flamegraph() - .width(window.innerWidth - 80) - .cellHeight(20) - .transitionDuration(300) - .minFrameSize(1) - .tooltip(tooltip) - .inverted(true) - .setColorMapper(function (d) { - const percentage = d.data.value / rootValue; - let colorIndex; - if (percentage >= 0.6) colorIndex = 7; - else if (percentage >= 0.35) colorIndex = 6; - else if (percentage >= 0.18) colorIndex = 5; - else if (percentage >= 0.12) colorIndex = 4; - else if (percentage >= 0.06) colorIndex = 3; - else if (percentage >= 0.03) colorIndex = 2; - else if (percentage >= 0.01) colorIndex = 1; - else colorIndex = 0; // <1% - return pythonColors[colorIndex]; - }); - return chart; -} - -function renderFlamegraph(chart, data) { - d3.select("#chart").datum(data).call(chart); - window.flamegraphChart = chart; // for controls - window.flamegraphData = data; // for resize/search - populateStats(data); -} - -function attachPanelControls() { - const infoBtn = document.getElementById("show-info-btn"); - const infoPanel = document.getElementById("info-panel"); - const closeBtn = document.getElementById("close-info-btn"); - if (infoBtn && infoPanel) { - infoBtn.addEventListener("click", function () { - const isOpen = infoPanel.style.display === "block"; - infoPanel.style.display = isOpen ? "none" : "block"; - }); - } - if (closeBtn && infoPanel) { - closeBtn.addEventListener("click", function () { - infoPanel.style.display = "none"; - }); - } -} - -function updateSearchHighlight(searchTerm, searchInput) { - d3.selectAll("#chart rect") - .style("stroke", null) - .style("stroke-width", null) - .style("opacity", null); - if (searchTerm && searchTerm.length > 0) { - d3.selectAll("#chart rect").style("opacity", 0.3); - let matchCount = 0; - d3.selectAll("#chart rect").each(function (d) { - if (d && d.data) { - const name = resolveString(d.data.name) || ""; - const funcname = resolveString(d.data.funcname) || ""; - const filename = resolveString(d.data.filename) || ""; - const term = searchTerm.toLowerCase(); - const matches = - name.toLowerCase().includes(term) || - funcname.toLowerCase().includes(term) || - filename.toLowerCase().includes(term); - if (matches) { - matchCount++; - d3.select(this) - .style("opacity", 1) - .style("stroke", "#ff6b35") - .style("stroke-width", "2px") - .style("stroke-dasharray", "3,3"); - } - } - }); - if (searchInput) { - if (matchCount > 0) { - searchInput.style.borderColor = "rgba(40, 167, 69, 0.8)"; - searchInput.style.boxShadow = "0 6px 20px rgba(40, 167, 69, 0.2)"; - } else { - searchInput.style.borderColor = "rgba(220, 53, 69, 0.8)"; - searchInput.style.boxShadow = "0 6px 20px rgba(220, 53, 69, 0.2)"; - } - } - } else if (searchInput) { - searchInput.style.borderColor = "rgba(255, 255, 255, 0.2)"; - searchInput.style.boxShadow = "0 4px 12px rgba(0, 0, 0, 0.1)"; - } -} - -function initSearchHandlers() { - const searchInput = document.getElementById("search-input"); - if (!searchInput) return; - let searchTimeout; - function performSearch() { - const term = searchInput.value.trim(); - updateSearchHighlight(term, searchInput); - } - searchInput.addEventListener("input", function () { - clearTimeout(searchTimeout); - searchTimeout = setTimeout(performSearch, 150); - }); - window.performSearch = performSearch; -} - -function handleResize(chart, data) { - window.addEventListener("resize", function () { - if (chart && data) { - const newWidth = window.innerWidth - 80; - chart.width(newWidth); - d3.select("#chart").datum(data).call(chart); - } - }); -} - -function initFlamegraph() { - ensureLibraryLoaded(); - - // Extract string table if present and resolve string indices - let processedData = EMBEDDED_DATA; - if (EMBEDDED_DATA.strings) { - stringTable = EMBEDDED_DATA.strings; - processedData = resolveStringIndices(EMBEDDED_DATA); - } - - // Store original data for filtering - originalData = processedData; - - // Initialize thread filter dropdown - initThreadFilter(processedData); - - const tooltip = createPythonTooltip(processedData); - const chart = createFlamegraph(tooltip, processedData.value); - renderFlamegraph(chart, processedData); - attachPanelControls(); - initSearchHandlers(); - handleResize(chart, processedData); -} - -if (document.readyState === "loading") { - document.addEventListener("DOMContentLoaded", initFlamegraph); -} else { - initFlamegraph(); -} - -function populateStats(data) { - const totalSamples = data.value || 0; - - // Collect all functions with their metrics, aggregated by function name - const functionMap = new Map(); - - function collectFunctions(node) { - if (!node) return; - - let filename = typeof node.filename === 'number' ? resolveString(node.filename) : node.filename; - let funcname = typeof node.funcname === 'number' ? resolveString(node.funcname) : node.funcname; - - if (!filename || !funcname) { - const nameStr = typeof node.name === 'number' ? resolveString(node.name) : node.name; - if (nameStr?.includes('(')) { - const match = nameStr.match(/^(.+?)\s*\((.+?):(\d+)\)$/); - if (match) { - funcname = funcname || match[1]; - filename = filename || match[2]; - } - } - } - - filename = filename || 'unknown'; - funcname = funcname || 'unknown'; - - if (filename !== 'unknown' && funcname !== 'unknown' && node.value > 0) { - // Calculate direct samples (this node's value minus children's values) - let childrenValue = 0; - if (node.children) { - childrenValue = node.children.reduce((sum, child) => sum + child.value, 0); - } - const directSamples = Math.max(0, node.value - childrenValue); - - // Use file:line:funcname as key to ensure uniqueness - const funcKey = `${filename}:${node.lineno || '?'}:${funcname}`; - - if (functionMap.has(funcKey)) { - const existing = functionMap.get(funcKey); - existing.directSamples += directSamples; - existing.directPercent = (existing.directSamples / totalSamples) * 100; - // Keep the most representative file/line (the one with more samples) - if (directSamples > existing.maxSingleSamples) { - existing.filename = filename; - existing.lineno = node.lineno || '?'; - existing.maxSingleSamples = directSamples; - } - } else { - functionMap.set(funcKey, { - filename: filename, - lineno: node.lineno || '?', - funcname: funcname, - directSamples, - directPercent: (directSamples / totalSamples) * 100, - maxSingleSamples: directSamples - }); - } - } - - if (node.children) { - node.children.forEach(child => collectFunctions(child)); - } - } - - collectFunctions(data); - - // Convert map to array and get top 3 hotspots - const hotSpots = Array.from(functionMap.values()) - .filter(f => f.directPercent > 0.5) // At least 0.5% to be significant - .sort((a, b) => b.directPercent - a.directPercent) - .slice(0, 3); - - // Populate the 3 cards - for (let i = 0; i < 3; i++) { - const num = i + 1; - if (i < hotSpots.length && hotSpots[i]) { - const hotspot = hotSpots[i]; - const filename = hotspot.filename || 'unknown'; - const basename = filename !== 'unknown' ? filename.split('/').pop() : 'unknown'; - const lineno = hotspot.lineno ?? '?'; - let funcDisplay = hotspot.funcname || 'unknown'; - if (funcDisplay.length > 35) { - funcDisplay = funcDisplay.substring(0, 32) + '...'; - } - - document.getElementById(`hotspot-file-${num}`).textContent = `${basename}:${lineno}`; - document.getElementById(`hotspot-func-${num}`).textContent = funcDisplay; - document.getElementById(`hotspot-detail-${num}`).textContent = `${hotspot.directPercent.toFixed(1)}% samples (${hotspot.directSamples.toLocaleString()})`; - } else { - document.getElementById(`hotspot-file-${num}`).textContent = '--'; - document.getElementById(`hotspot-func-${num}`).textContent = '--'; - document.getElementById(`hotspot-detail-${num}`).textContent = '--'; - } - } -} - -// Control functions -function resetZoom() { - if (window.flamegraphChart) { - window.flamegraphChart.resetZoom(); - } -} - -function exportSVG() { - const svgElement = document.querySelector("#chart svg"); - if (svgElement) { - const serializer = new XMLSerializer(); - const svgString = serializer.serializeToString(svgElement); - const blob = new Blob([svgString], { type: "image/svg+xml" }); - const url = URL.createObjectURL(blob); - const a = document.createElement("a"); - a.href = url; - a.download = "python-performance-flamegraph.svg"; - a.click(); - URL.revokeObjectURL(url); - } -} - -function toggleLegend() { - const legendPanel = document.getElementById("legend-panel"); - const isHidden = - legendPanel.style.display === "none" || legendPanel.style.display === ""; - legendPanel.style.display = isHidden ? "block" : "none"; -} - -function clearSearch() { - const searchInput = document.getElementById("search-input"); - if (searchInput) { - searchInput.value = ""; - if (window.flamegraphChart) { - window.flamegraphChart.clear(); - } - } -} - -function initThreadFilter(data) { - const threadFilter = document.getElementById('thread-filter'); - const threadWrapper = document.querySelector('.thread-filter-wrapper'); - - if (!threadFilter || !data.threads) { - return; - } - - // Clear existing options except "All Threads" - threadFilter.innerHTML = ''; - - // Add thread options - const threads = data.threads || []; - threads.forEach(threadId => { - const option = document.createElement('option'); - option.value = threadId; - option.textContent = `Thread ${threadId}`; - threadFilter.appendChild(option); - }); - - // Show filter if more than one thread - if (threads.length > 1 && threadWrapper) { - threadWrapper.style.display = 'inline-flex'; - } -} - -function filterByThread() { - const threadFilter = document.getElementById('thread-filter'); - if (!threadFilter || !originalData) return; - - const selectedThread = threadFilter.value; - currentThreadFilter = selectedThread; - - let filteredData; - if (selectedThread === 'all') { - // Show all data - filteredData = originalData; - } else { - // Filter data by thread - const threadId = parseInt(selectedThread); - filteredData = filterDataByThread(originalData, threadId); - - if (filteredData.strings) { - stringTable = filteredData.strings; - filteredData = resolveStringIndices(filteredData); - } - } - - // Re-render flamegraph with filtered data - const tooltip = createPythonTooltip(filteredData); - const chart = createFlamegraph(tooltip, filteredData.value); - renderFlamegraph(chart, filteredData); -} - -function filterDataByThread(data, threadId) { - function filterNode(node) { - if (!node.threads || !node.threads.includes(threadId)) { - return null; - } - - const filteredNode = { - ...node, - children: [] - }; - - if (node.children && Array.isArray(node.children)) { - filteredNode.children = node.children - .map(child => filterNode(child)) - .filter(child => child !== null); - } - - return filteredNode; - } - - const filteredRoot = { - ...data, - children: [] - }; - - if (data.children && Array.isArray(data.children)) { - filteredRoot.children = data.children - .map(child => filterNode(child)) - .filter(child => child !== null); - } - - function recalculateValue(node) { - if (!node.children || node.children.length === 0) { - return node.value || 0; - } - const childrenValue = node.children.reduce((sum, child) => sum + recalculateValue(child), 0); - node.value = Math.max(node.value || 0, childrenValue); - return node.value; - } - - recalculateValue(filteredRoot); - - return filteredRoot; -} - diff --git a/Lib/profiling/sampling/flamegraph_template.html b/Lib/profiling/sampling/flamegraph_template.html deleted file mode 100644 index 585a1abb61f..00000000000 --- a/Lib/profiling/sampling/flamegraph_template.html +++ /dev/null @@ -1,153 +0,0 @@ - - - - - - Python Performance Flamegraph - - - - - - - -
-
- -
-

Tachyon Profiler Performance Flamegraph

-
- Interactive visualization of function call performance -
-
- -
-
- -
-
-
-
🥇
-
-
#1 Hot Spot
-
--
-
--
-
--
-
-
-
-
🥈
-
-
#2 Hot Spot
-
--
-
--
-
--
-
-
-
-
🥉
-
-
#3 Hot Spot
-
--
-
--
-
--
-
-
-
-
- -
-
- - - -
- - -
-
-
- - - -
- -

Navigation Guide

-

Click: Zoom into function

-

Hover: Show detailed information

-

Width: Time spent in function

-

Height: Call stack depth

-

Color: Performance intensity

-
- -
-

🔥 Performance Heat Map

-
-
-
-
Hottest Functions (≥60%)
-
Highest performance impact
-
-
-
-
-
-
Very Hot Functions (35-60%)
-
High performance impact
-
-
-
-
-
-
Hot Functions (18-35%)
-
Notable performance cost
-
-
-
-
-
-
Warm Functions (12-18%)
-
Moderate impact
-
-
-
-
-
-
Medium Functions (6-12%)
-
Some performance impact
-
-
-
-
-
-
Cool Functions (3-6%)
-
Low performance impact
-
-
-
-
-
-
Cold Functions (1-3%)
-
Minimal performance impact
-
-
-
-
-
-
Coldest Functions (<1%)
-
Very low performance impact
-
-
-
- -
-
-
- - - - diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py index 548acbf24b7..921cd625f04 100644 --- a/Lib/profiling/sampling/gecko_collector.py +++ b/Lib/profiling/sampling/gecko_collector.py @@ -1,9 +1,20 @@ +import itertools import json import os import platform +import sys +import threading import time -from .collector import Collector, THREAD_STATE_RUNNING +from .collector import Collector +try: + from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED +except ImportError: + # Fallback if module not available (shouldn't happen in normal use) + THREAD_STATUS_HAS_GIL = (1 << 0) + THREAD_STATUS_ON_CPU = (1 << 1) + THREAD_STATUS_UNKNOWN = (1 << 2) + THREAD_STATUS_GIL_REQUESTED = (1 << 3) # Categories matching Firefox Profiler expectations @@ -11,14 +22,20 @@ {"name": "Other", "color": "grey", "subcategories": ["Other"]}, {"name": "Python", "color": "yellow", "subcategories": ["Other"]}, {"name": "Native", "color": "blue", "subcategories": ["Other"]}, - {"name": "Idle", "color": "transparent", "subcategories": ["Other"]}, + {"name": "GC", "color": "orange", "subcategories": ["Other"]}, + {"name": "GIL", "color": "green", "subcategories": ["Other"]}, + {"name": "CPU", "color": "purple", "subcategories": ["Other"]}, + {"name": "Code Type", "color": "red", "subcategories": ["Other"]}, ] # Category indices CATEGORY_OTHER = 0 CATEGORY_PYTHON = 1 CATEGORY_NATIVE = 2 -CATEGORY_IDLE = 3 +CATEGORY_GC = 3 +CATEGORY_GIL = 4 +CATEGORY_CPU = 5 +CATEGORY_CODE_TYPE = 6 # Subcategory indices DEFAULT_SUBCATEGORY = 0 @@ -39,7 +56,8 @@ class GeckoCollector(Collector): - def __init__(self, *, skip_idle=False): + def __init__(self, sample_interval_usec, *, skip_idle=False): + self.sample_interval_usec = sample_interval_usec self.skip_idle = skip_idle self.start_time = time.time() * 1000 # milliseconds since epoch @@ -58,6 +76,56 @@ def __init__(self, *, skip_idle=False): self.last_sample_time = 0 self.interval = 1.0 # Will be calculated from actual sampling + # State tracking for interval markers (tid -> start_time) + self.has_gil_start = {} # Thread has the GIL + self.no_gil_start = {} # Thread doesn't have the GIL + self.on_cpu_start = {} # Thread is running on CPU + self.off_cpu_start = {} # Thread is off CPU + self.python_code_start = {} # Thread running Python code (has GIL) + self.native_code_start = {} # Thread running native code (on CPU without GIL) + self.gil_wait_start = {} # Thread waiting for GIL + + # GC event tracking: track GC start time per thread + self.gc_start_per_thread = {} # tid -> start_time + + # Track which threads have been initialized for state tracking + self.initialized_threads = set() + + def _track_state_transition(self, tid, condition, active_dict, inactive_dict, + active_name, inactive_name, category, current_time): + """Track binary state transitions and emit markers. + + Args: + tid: Thread ID + condition: Whether the active state is true + active_dict: Dict tracking start time of active state + inactive_dict: Dict tracking start time of inactive state + active_name: Name for active state marker + inactive_name: Name for inactive state marker + category: Gecko category for the markers + current_time: Current timestamp + """ + # On first observation of a thread, just record the current state + # without creating a marker (we don't know what the previous state was) + if tid not in self.initialized_threads: + if condition: + active_dict[tid] = current_time + else: + inactive_dict[tid] = current_time + return + + # For already-initialized threads, track transitions + if condition: + active_dict.setdefault(tid, current_time) + if tid in inactive_dict: + self._add_marker(tid, inactive_name, inactive_dict.pop(tid), + current_time, category) + else: + inactive_dict.setdefault(tid, current_time) + if tid in active_dict: + self._add_marker(tid, active_name, active_dict.pop(tid), + current_time, category) + def collect(self, stack_frames): """Collect a sample from stack frames.""" current_time = (time.time() * 1000) - self.start_time @@ -69,18 +137,10 @@ def collect(self, stack_frames): ) / self.sample_count self.last_sample_time = current_time + # Process threads and track GC per thread for interpreter_info in stack_frames: for thread_info in interpreter_info.threads: - if ( - self.skip_idle - and thread_info.status != THREAD_STATE_RUNNING - ): - continue - frames = thread_info.frame_info - if not frames: - continue - tid = thread_info.thread_id # Initialize thread if needed @@ -89,6 +149,80 @@ def collect(self, stack_frames): thread_data = self.threads[tid] + # Decode status flags + status_flags = thread_info.status + has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL) + on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU) + gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED) + + # Track GIL possession (Has GIL / No GIL) + self._track_state_transition( + tid, has_gil, self.has_gil_start, self.no_gil_start, + "Has GIL", "No GIL", CATEGORY_GIL, current_time + ) + + # Track CPU state (On CPU / Off CPU) + self._track_state_transition( + tid, on_cpu, self.on_cpu_start, self.off_cpu_start, + "On CPU", "Off CPU", CATEGORY_CPU, current_time + ) + + # Track code type (Python Code / Native Code) + # This is tri-state: Python (has_gil), Native (on_cpu without gil), or Neither + if has_gil: + self._track_state_transition( + tid, True, self.python_code_start, self.native_code_start, + "Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time + ) + elif on_cpu: + self._track_state_transition( + tid, True, self.native_code_start, self.python_code_start, + "Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time + ) + else: + # Thread is idle (neither has GIL nor on CPU) - close any open code markers + # This handles the third state that _track_state_transition doesn't cover + if tid in self.initialized_threads: + if tid in self.python_code_start: + self._add_marker(tid, "Python Code", self.python_code_start.pop(tid), + current_time, CATEGORY_CODE_TYPE) + if tid in self.native_code_start: + self._add_marker(tid, "Native Code", self.native_code_start.pop(tid), + current_time, CATEGORY_CODE_TYPE) + + # Track "Waiting for GIL" intervals (one-sided tracking) + if gil_requested: + self.gil_wait_start.setdefault(tid, current_time) + elif tid in self.gil_wait_start: + self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid), + current_time, CATEGORY_GIL) + + # Track GC events by detecting frames in the stack trace + # This leverages the improved GC frame tracking from commit 336366fd7ca + # which precisely identifies the thread that initiated GC collection + has_gc_frame = any(frame[2] == "" for frame in frames) + if has_gc_frame: + # This thread initiated GC collection + if tid not in self.gc_start_per_thread: + self.gc_start_per_thread[tid] = current_time + elif tid in self.gc_start_per_thread: + # End GC marker when no more GC frames are detected + self._add_marker(tid, "GC Collecting", self.gc_start_per_thread.pop(tid), + current_time, CATEGORY_GC) + + # Mark thread as initialized after processing all state transitions + self.initialized_threads.add(tid) + + # Categorize: idle if neither has GIL nor on CPU + is_idle = not has_gil and not on_cpu + + # Skip idle threads if skip_idle is enabled + if self.skip_idle and is_idle: + continue + + if not frames: + continue + # Process the stack stack_index = self._process_stack(thread_data, frames) @@ -102,7 +236,6 @@ def collect(self, stack_frames): def _create_thread(self, tid): """Create a new thread structure with processed profile format.""" - import threading # Determine if this is the main thread try: @@ -181,7 +314,7 @@ def _create_thread(self, tid): "functionSize": [], "length": 0, }, - # Markers - processed format + # Markers - processed format (arrays) "markers": { "data": [], "name": [], @@ -215,6 +348,27 @@ def _intern_string(self, s): self.global_string_map[s] = idx return idx + def _add_marker(self, tid, name, start_time, end_time, category): + """Add an interval marker for a specific thread.""" + if tid not in self.threads: + return + + thread_data = self.threads[tid] + duration = end_time - start_time + + name_idx = self._intern_string(name) + markers = thread_data["markers"] + markers["name"].append(name_idx) + markers["startTime"].append(start_time) + markers["endTime"].append(end_time) + markers["phase"].append(1) # 1 = interval marker + markers["category"].append(category) + markers["data"].append({ + "type": name.replace(" ", ""), + "duration": duration, + "tid": tid + }) + def _process_stack(self, thread_data, frames): """Process a stack and return the stack index.""" if not frames: @@ -383,15 +537,63 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno): frame_cache[frame_key] = frame_idx return frame_idx + def _finalize_markers(self): + """Close any open markers at the end of profiling.""" + end_time = self.last_sample_time + + # Close all open markers for each thread using a generic approach + marker_states = [ + (self.has_gil_start, "Has GIL", CATEGORY_GIL), + (self.no_gil_start, "No GIL", CATEGORY_GIL), + (self.on_cpu_start, "On CPU", CATEGORY_CPU), + (self.off_cpu_start, "Off CPU", CATEGORY_CPU), + (self.python_code_start, "Python Code", CATEGORY_CODE_TYPE), + (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE), + (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL), + (self.gc_start_per_thread, "GC Collecting", CATEGORY_GC), + ] + + for state_dict, marker_name, category in marker_states: + for tid in list(state_dict.keys()): + self._add_marker(tid, marker_name, state_dict[tid], end_time, category) + del state_dict[tid] + def export(self, filename): """Export the profile to a Gecko JSON file.""" + if self.sample_count > 0 and self.last_sample_time > 0: self.interval = self.last_sample_time / self.sample_count - profile = self._build_profile() + # Spinner for progress indication + spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']) + stop_spinner = threading.Event() - with open(filename, "w") as f: - json.dump(profile, f, separators=(",", ":")) + def spin(): + message = 'Building Gecko profile...' + while not stop_spinner.is_set(): + sys.stderr.write(f'\r{next(spinner)} {message}') + sys.stderr.flush() + time.sleep(0.1) + # Clear the spinner line + sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r') + sys.stderr.flush() + + spinner_thread = threading.Thread(target=spin, daemon=True) + spinner_thread.start() + + try: + # Finalize any open markers before building profile + self._finalize_markers() + + profile = self._build_profile() + + with open(filename, "w") as f: + json.dump(profile, f, separators=(",", ":")) + finally: + stop_spinner.set() + spinner_thread.join(timeout=1.0) + # Small delay to ensure the clear happens + time.sleep(0.01) print(f"Gecko profile written to {filename}") print( @@ -416,6 +618,7 @@ def _build_profile(self): frame_table["length"] = len(frame_table["func"]) func_table["length"] = len(func_table["name"]) resource_table["length"] = len(resource_table["name"]) + thread_data["markers"]["length"] = len(thread_data["markers"]["name"]) # Clean up internal caches del thread_data["_stackCache"] diff --git a/Lib/profiling/sampling/heatmap_collector.py b/Lib/profiling/sampling/heatmap_collector.py new file mode 100644 index 00000000000..eb128aba9b1 --- /dev/null +++ b/Lib/profiling/sampling/heatmap_collector.py @@ -0,0 +1,1039 @@ +"""Heatmap collector for Python profiling with line-level execution heat visualization.""" + +import base64 +import collections +import html +import importlib.resources +import json +import os +import platform +import site +import sys +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict, List, Tuple + +from ._css_utils import get_combined_css +from .stack_collector import StackTraceCollector + + +# ============================================================================ +# Data Classes +# ============================================================================ + +@dataclass +class FileStats: + """Statistics for a single profiled file.""" + filename: str + module_name: str + module_type: str + total_samples: int + total_self_samples: int + num_lines: int + max_samples: int + max_self_samples: int + percentage: float = 0.0 + + +@dataclass +class TreeNode: + """Node in the hierarchical file tree structure.""" + files: List[FileStats] = field(default_factory=list) + samples: int = 0 + count: int = 0 + children: Dict[str, 'TreeNode'] = field(default_factory=dict) + + +@dataclass +class ColorGradient: + """Configuration for heatmap color gradient calculations.""" + # Color stops thresholds + stop_1: float = 0.2 # Blue to cyan transition + stop_2: float = 0.4 # Cyan to green transition + stop_3: float = 0.6 # Green to yellow transition + stop_4: float = 0.8 # Yellow to orange transition + stop_5: float = 1.0 # Orange to red transition + + # Alpha (opacity) values + alpha_very_cold: float = 0.3 + alpha_cold: float = 0.4 + alpha_medium: float = 0.5 + alpha_warm: float = 0.6 + alpha_hot_base: float = 0.7 + alpha_hot_range: float = 0.15 + + # Gradient multiplier + multiplier: int = 5 + + # Cache for calculated colors + cache: Dict[float, Tuple[int, int, int, float]] = field(default_factory=dict) + + +# ============================================================================ +# Module Path Analysis +# ============================================================================ + +def get_python_path_info(): + """Get information about Python installation paths for module extraction. + + Returns: + dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries. + """ + info = { + 'stdlib': None, + 'site_packages': [], + 'sys_path': [] + } + + # Get standard library path from os module location + try: + if hasattr(os, '__file__') and os.__file__: + info['stdlib'] = Path(os.__file__).parent + except (AttributeError, OSError): + pass # Silently continue if we can't determine stdlib path + + # Get site-packages directories + site_packages = [] + try: + site_packages.extend(Path(p) for p in site.getsitepackages()) + except (AttributeError, OSError): + pass # Continue without site packages if unavailable + + # Get user site-packages + try: + user_site = site.getusersitepackages() + if user_site and Path(user_site).exists(): + site_packages.append(Path(user_site)) + except (AttributeError, OSError): + pass # Continue without user site packages + + info['site_packages'] = site_packages + info['sys_path'] = [Path(p) for p in sys.path if p] + + return info + + +def extract_module_name(filename, path_info): + """Extract Python module name and type from file path. + + Args: + filename: Path to the Python file + path_info: Dictionary from get_python_path_info() + + Returns: + tuple: (module_name, module_type) where module_type is one of: + 'stdlib', 'site-packages', 'project', or 'other' + """ + if not filename: + return ('unknown', 'other') + + try: + file_path = Path(filename) + except (ValueError, OSError): + return (str(filename), 'other') + + # Check if it's in stdlib + if path_info['stdlib'] and _is_subpath(file_path, path_info['stdlib']): + try: + rel_path = file_path.relative_to(path_info['stdlib']) + return (_path_to_module(rel_path), 'stdlib') + except ValueError: + pass + + # Check site-packages + for site_pkg in path_info['site_packages']: + if _is_subpath(file_path, site_pkg): + try: + rel_path = file_path.relative_to(site_pkg) + return (_path_to_module(rel_path), 'site-packages') + except ValueError: + continue + + # Check other sys.path entries (project files) + if not str(file_path).startswith(('<', '[')): # Skip special files + for path_entry in path_info['sys_path']: + if _is_subpath(file_path, path_entry): + try: + rel_path = file_path.relative_to(path_entry) + return (_path_to_module(rel_path), 'project') + except ValueError: + continue + + # Fallback: just use the filename + return (_path_to_module(file_path), 'other') + + +def _is_subpath(file_path, parent_path): + try: + file_path.relative_to(parent_path) + return True + except (ValueError, OSError): + return False + + +def _path_to_module(path): + if isinstance(path, str): + path = Path(path) + + # Remove .py extension + if path.suffix == '.py': + path = path.with_suffix('') + + # Convert path separators to dots + parts = path.parts + + # Handle __init__ files - they represent the package itself + if parts and parts[-1] == '__init__': + parts = parts[:-1] + + return '.'.join(parts) if parts else path.stem + + +# ============================================================================ +# Helper Classes +# ============================================================================ + +class _TemplateLoader: + """Loads and caches HTML/CSS/JS templates for heatmap generation.""" + + def __init__(self): + """Load all templates and assets once.""" + self.index_template = None + self.file_template = None + self.index_css = None + self.index_js = None + self.file_css = None + self.file_js = None + self.logo_html = None + + self._load_templates() + + def _load_templates(self): + """Load all template files from _heatmap_assets.""" + try: + template_dir = importlib.resources.files(__package__) + assets_dir = template_dir / "_heatmap_assets" + + # Load HTML templates + self.index_template = (assets_dir / "heatmap_index_template.html").read_text(encoding="utf-8") + self.file_template = (assets_dir / "heatmap_pyfile_template.html").read_text(encoding="utf-8") + + # Load CSS (same file used for both index and file pages) + css_content = get_combined_css("heatmap") + self.index_css = css_content + self.file_css = css_content + + # Load JS + self.index_js = (assets_dir / "heatmap_index.js").read_text(encoding="utf-8") + self.file_js = (assets_dir / "heatmap.js").read_text(encoding="utf-8") + + # Load Python logo + logo_dir = template_dir / "_assets" + try: + png_path = logo_dir / "python-logo-only.png" + b64_logo = base64.b64encode(png_path.read_bytes()).decode("ascii") + self.logo_html = f'' + except (FileNotFoundError, IOError) as e: + self.logo_html = '
' + print(f"Warning: Could not load Python logo: {e}") + + except (FileNotFoundError, IOError) as e: + raise RuntimeError(f"Failed to load heatmap template files: {e}") from e + + +class _TreeBuilder: + """Builds hierarchical tree structure from file statistics.""" + + @staticmethod + def build_file_tree(file_stats: List[FileStats]) -> Dict[str, TreeNode]: + """Build hierarchical tree grouped by module type, then by module structure. + + Args: + file_stats: List of FileStats objects + + Returns: + Dictionary mapping module types to their tree roots + """ + # Group by module type first + type_groups = {'stdlib': [], 'site-packages': [], 'project': [], 'other': []} + for stat in file_stats: + type_groups[stat.module_type].append(stat) + + # Build tree for each type + trees = {} + for module_type, stats in type_groups.items(): + if not stats: + continue + + root_node = TreeNode() + + for stat in stats: + module_name = stat.module_name + parts = module_name.split('.') + + # Navigate/create tree structure + current_node = root_node + for i, part in enumerate(parts): + if i == len(parts) - 1: + # Last part - store the file + current_node.files.append(stat) + else: + # Intermediate part - create or navigate + if part not in current_node.children: + current_node.children[part] = TreeNode() + current_node = current_node.children[part] + + # Calculate aggregate stats for this type's tree + _TreeBuilder._calculate_node_stats(root_node) + trees[module_type] = root_node + + return trees + + @staticmethod + def _calculate_node_stats(node: TreeNode) -> Tuple[int, int]: + """Recursively calculate aggregate statistics for tree nodes. + + Args: + node: TreeNode to calculate stats for + + Returns: + Tuple of (total_samples, file_count) + """ + total_samples = 0 + file_count = 0 + + # Count files at this level + for file_stat in node.files: + total_samples += file_stat.total_samples + file_count += 1 + + # Recursively process children + for child in node.children.values(): + child_samples, child_count = _TreeBuilder._calculate_node_stats(child) + total_samples += child_samples + file_count += child_count + + node.samples = total_samples + node.count = file_count + return total_samples, file_count + + +class _HtmlRenderer: + """Renders hierarchical tree structures as HTML.""" + + def __init__(self, file_index: Dict[str, str], color_gradient: ColorGradient, + calculate_intensity_color_func): + """Initialize renderer with file index and color calculation function. + + Args: + file_index: Mapping from filenames to HTML file names + color_gradient: ColorGradient configuration + calculate_intensity_color_func: Function to calculate colors + """ + self.file_index = file_index + self.color_gradient = color_gradient + self.calculate_intensity_color = calculate_intensity_color_func + self.heatmap_bar_height = 16 + + def render_hierarchical_html(self, trees: Dict[str, TreeNode]) -> str: + """Build hierarchical HTML with type sections and collapsible module folders. + + Args: + trees: Dictionary mapping module types to tree roots + + Returns: + Complete HTML string for all sections + """ + type_names = { + 'stdlib': '📚 Standard Library', + 'site-packages': '📦 Site Packages', + 'project': '🏗️ Project Files', + 'other': '📄 Other Files' + } + + sections = [] + for module_type in ['project', 'stdlib', 'site-packages', 'other']: + if module_type not in trees: + continue + + tree = trees[module_type] + + # Project starts expanded, others start collapsed + is_collapsed = module_type in {'stdlib', 'site-packages', 'other'} + icon = '▶' if is_collapsed else '▼' + content_style = ' style="display: none;"' if is_collapsed else '' + + section_html = f''' +
+
+ {icon} + {type_names[module_type]} + ({tree.count} files, {tree.samples:,} samples) +
+
+''' + + # Render root folders + root_folders = sorted(tree.children.items(), + key=lambda x: x[1].samples, reverse=True) + + for folder_name, folder_node in root_folders: + section_html += self._render_folder(folder_node, folder_name, level=1) + + # Render root files (files not in any module) + if tree.files: + sorted_files = sorted(tree.files, key=lambda x: x.total_samples, reverse=True) + section_html += '
\n' + for stat in sorted_files: + section_html += self._render_file_item(stat, indent=' ') + section_html += '
\n' + + section_html += '
\n
\n' + sections.append(section_html) + + return '\n'.join(sections) + + def _render_folder(self, node: TreeNode, name: str, level: int = 1) -> str: + """Render a single folder node recursively. + + Args: + node: TreeNode to render + name: Display name for the folder + level: Nesting level for indentation + + Returns: + HTML string for this folder and its contents + """ + indent = ' ' * level + parts = [] + + # Render folder header (collapsed by default) + parts.append(f'{indent}') + + return '\n'.join(parts) + + def _render_file_item(self, stat: FileStats, indent: str = '') -> str: + """Render a single file item with heatmap bar. + + Args: + stat: FileStats object + indent: Indentation string + + Returns: + HTML string for file item + """ + full_path = html.escape(stat.filename) + module_name = html.escape(stat.module_name) + + intensity = stat.percentage / 100.0 + r, g, b, alpha = self.calculate_intensity_color(intensity) + bg_color = f"rgba({r}, {g}, {b}, {alpha})" + bar_width = min(stat.percentage, 100) + + html_file = self.file_index[stat.filename] + + return (f'{indent}
\n' + f'{indent} 📄 {module_name}\n' + f'{indent} {stat.total_samples:,} samples\n' + f'{indent}
\n' + f'{indent}
\n') + + +# ============================================================================ +# Main Collector Class +# ============================================================================ + +class HeatmapCollector(StackTraceCollector): + """Collector that generates coverage.py-style heatmap HTML output with line intensity. + + This collector creates detailed HTML reports showing which lines of code + were executed most frequently during profiling, similar to coverage.py + but showing execution "heat" rather than just coverage. + """ + + # File naming and formatting constants + FILE_INDEX_FORMAT = "file_{:04d}.html" + + def __init__(self, *args, **kwargs): + """Initialize the heatmap collector with data structures for analysis.""" + super().__init__(*args, **kwargs) + + # Sample counting data structures + self.line_samples = collections.Counter() + self.file_samples = collections.defaultdict(collections.Counter) + self.line_self_samples = collections.Counter() + self.file_self_samples = collections.defaultdict(collections.Counter) + + # Call graph data structures for navigation + self.call_graph = collections.defaultdict(list) + self.callers_graph = collections.defaultdict(list) + self.function_definitions = {} + + # Edge counting for call path analysis + self.edge_samples = collections.Counter() + + # Statistics and metadata + self._total_samples = 0 + self._path_info = get_python_path_info() + self.stats = {} + + # Color gradient configuration + self._color_gradient = ColorGradient() + + # Template loader (loads all templates once) + self._template_loader = _TemplateLoader() + + # File index (populated during export) + self.file_index = {} + + @property + def _color_cache(self): + """Compatibility property for accessing color cache.""" + return self._color_gradient.cache + + def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None, missed_samples=None, **kwargs): + """Set profiling statistics to include in heatmap output. + + Args: + sample_interval_usec: Sampling interval in microseconds + duration_sec: Total profiling duration in seconds + sample_rate: Effective sampling rate + error_rate: Optional error rate during profiling + missed_samples: Optional percentage of missed samples + **kwargs: Additional statistics to include + """ + self.stats = { + "sample_interval_usec": sample_interval_usec, + "duration_sec": duration_sec, + "sample_rate": sample_rate, + "error_rate": error_rate, + "missed_samples": missed_samples, + "python_version": sys.version, + "python_implementation": platform.python_implementation(), + "platform": platform.platform(), + } + self.stats.update(kwargs) + + def process_frames(self, frames, thread_id): + """Process stack frames and count samples per line. + + Args: + frames: List of frame tuples (filename, lineno, funcname) + frames[0] is the leaf (top of stack, where execution is) + thread_id: Thread ID for this stack trace + """ + self._total_samples += 1 + + # Count each line in the stack and build call graph + for i, frame_info in enumerate(frames): + filename, lineno, funcname = frame_info + + if not self._is_valid_frame(filename, lineno): + continue + + # frames[0] is the leaf - where execution is actually happening + is_leaf = (i == 0) + self._record_line_sample(filename, lineno, funcname, is_leaf=is_leaf) + + # Build call graph for adjacent frames + if i + 1 < len(frames): + self._record_call_relationship(frames[i], frames[i + 1]) + + def _is_valid_frame(self, filename, lineno): + """Check if a frame should be included in the heatmap.""" + # Skip internal or invalid files + if not filename or filename.startswith('<') or filename.startswith('['): + return False + + # Skip invalid frames with corrupted filename data + if filename == "__init__" and lineno == 0: + return False + + return True + + def _record_line_sample(self, filename, lineno, funcname, is_leaf=False): + """Record a sample for a specific line.""" + # Track cumulative samples (all occurrences in stack) + self.line_samples[(filename, lineno)] += 1 + self.file_samples[filename][lineno] += 1 + + # Track self/leaf samples (only when at top of stack) + if is_leaf: + self.line_self_samples[(filename, lineno)] += 1 + self.file_self_samples[filename][lineno] += 1 + + # Record function definition location + if funcname and (filename, funcname) not in self.function_definitions: + self.function_definitions[(filename, funcname)] = lineno + + def _record_call_relationship(self, callee_frame, caller_frame): + """Record caller/callee relationship between adjacent frames.""" + callee_filename, callee_lineno, callee_funcname = callee_frame + caller_filename, caller_lineno, caller_funcname = caller_frame + + # Skip internal files for call graph + if callee_filename.startswith('<') or callee_filename.startswith('['): + return + + # Get the callee's function definition line + callee_def_line = self.function_definitions.get( + (callee_filename, callee_funcname), callee_lineno + ) + + # Record caller -> callee relationship + caller_key = (caller_filename, caller_lineno) + callee_info = (callee_filename, callee_def_line, callee_funcname) + if callee_info not in self.call_graph[caller_key]: + self.call_graph[caller_key].append(callee_info) + + # Record callee <- caller relationship + callee_key = (callee_filename, callee_def_line) + caller_info = (caller_filename, caller_lineno, caller_funcname) + if caller_info not in self.callers_graph[callee_key]: + self.callers_graph[callee_key].append(caller_info) + + # Count this call edge for path analysis + edge_key = (caller_key, callee_key) + self.edge_samples[edge_key] += 1 + + def export(self, output_path): + """Export heatmap data as HTML files in a directory. + + Args: + output_path: Path where to create the heatmap output directory + """ + if not self.file_samples: + print("Warning: No heatmap data to export") + return + + try: + output_dir = self._prepare_output_directory(output_path) + file_stats = self._calculate_file_stats() + self._create_file_index(file_stats) + + # Generate individual file reports + self._generate_file_reports(output_dir, file_stats) + + # Generate index page + self._generate_index_html(output_dir / 'index.html', file_stats) + + self._print_export_summary(output_dir, file_stats) + + except Exception as e: + print(f"Error: Failed to export heatmap: {e}") + raise + + def _prepare_output_directory(self, output_path): + """Create output directory for heatmap files.""" + output_dir = Path(output_path) + if output_dir.suffix == '.html': + output_dir = output_dir.with_suffix('') + + try: + output_dir.mkdir(exist_ok=True, parents=True) + except (IOError, OSError) as e: + raise RuntimeError(f"Failed to create output directory {output_dir}: {e}") from e + + return output_dir + + def _create_file_index(self, file_stats: List[FileStats]): + """Create mapping from filenames to HTML file names.""" + self.file_index = { + stat.filename: self.FILE_INDEX_FORMAT.format(i) + for i, stat in enumerate(file_stats) + } + + def _generate_file_reports(self, output_dir, file_stats: List[FileStats]): + """Generate HTML report for each source file.""" + for stat in file_stats: + file_path = output_dir / self.file_index[stat.filename] + line_counts = self.file_samples[stat.filename] + valid_line_counts = {line: count for line, count in line_counts.items() if line >= 0} + + self_counts = self.file_self_samples.get(stat.filename, {}) + valid_self_counts = {line: count for line, count in self_counts.items() if line >= 0} + + self._generate_file_html( + file_path, + stat.filename, + valid_line_counts, + valid_self_counts, + stat + ) + + def _print_export_summary(self, output_dir, file_stats: List[FileStats]): + """Print summary of exported heatmap.""" + print(f"Heatmap output written to {output_dir}/") + print(f" - Index: {output_dir / 'index.html'}") + print(f" - {len(file_stats)} source file(s) analyzed") + + def _calculate_file_stats(self) -> List[FileStats]: + """Calculate statistics for each file. + + Returns: + List of FileStats objects sorted by total samples + """ + file_stats = [] + for filename, line_counts in self.file_samples.items(): + # Skip special frames + if filename in ('~', '...', '.') or filename.startswith('<') or filename.startswith('['): + continue + + # Filter out lines with -1 (special frames) + valid_line_counts = {line: count for line, count in line_counts.items() if line >= 0} + if not valid_line_counts: + continue + + # Get self samples for this file + self_line_counts = self.file_self_samples.get(filename, {}) + valid_self_counts = {line: count for line, count in self_line_counts.items() if line >= 0} + + total_samples = sum(valid_line_counts.values()) + total_self_samples = sum(valid_self_counts.values()) + num_lines = len(valid_line_counts) + max_samples = max(valid_line_counts.values()) + max_self_samples = max(valid_self_counts.values()) if valid_self_counts else 0 + module_name, module_type = extract_module_name(filename, self._path_info) + + file_stats.append(FileStats( + filename=filename, + module_name=module_name, + module_type=module_type, + total_samples=total_samples, + total_self_samples=total_self_samples, + num_lines=num_lines, + max_samples=max_samples, + max_self_samples=max_self_samples, + percentage=0.0 + )) + + # Sort by total samples and calculate percentages + file_stats.sort(key=lambda x: x.total_samples, reverse=True) + if file_stats: + max_total = file_stats[0].total_samples + for stat in file_stats: + stat.percentage = (stat.total_samples / max_total * 100) if max_total > 0 else 0 + + return file_stats + + def _generate_index_html(self, index_path: Path, file_stats: List[FileStats]): + """Generate index.html with list of all profiled files.""" + # Build hierarchical tree + tree = _TreeBuilder.build_file_tree(file_stats) + + # Render tree as HTML + renderer = _HtmlRenderer(self.file_index, self._color_gradient, + self._calculate_intensity_color) + sections_html = renderer.render_hierarchical_html(tree) + + # Format error rate and missed samples with bar classes + error_rate = self.stats.get('error_rate') + if error_rate is not None: + error_rate_str = f"{error_rate:.1f}%" + error_rate_width = min(error_rate, 100) + # Determine bar color class based on rate + if error_rate < 5: + error_rate_class = "good" + elif error_rate < 15: + error_rate_class = "warning" + else: + error_rate_class = "error" + else: + error_rate_str = "N/A" + error_rate_width = 0 + error_rate_class = "good" + + missed_samples = self.stats.get('missed_samples') + if missed_samples is not None: + missed_samples_str = f"{missed_samples:.1f}%" + missed_samples_width = min(missed_samples, 100) + if missed_samples < 5: + missed_samples_class = "good" + elif missed_samples < 15: + missed_samples_class = "warning" + else: + missed_samples_class = "error" + else: + missed_samples_str = "N/A" + missed_samples_width = 0 + missed_samples_class = "good" + + # Populate template + replacements = { + "": f"", + "": f"", + "": self._template_loader.logo_html, + "": str(len(file_stats)), + "": f"{self._total_samples:,}", + "": f"{self.stats.get('duration_sec', 0):.1f}s", + "": f"{self.stats.get('sample_rate', 0):.1f}", + "": error_rate_str, + "": str(error_rate_width), + "": error_rate_class, + "": missed_samples_str, + "": str(missed_samples_width), + "": missed_samples_class, + "": sections_html, + } + + html_content = self._template_loader.index_template + for placeholder, value in replacements.items(): + html_content = html_content.replace(placeholder, value) + + try: + index_path.write_text(html_content, encoding='utf-8') + except (IOError, OSError) as e: + raise RuntimeError(f"Failed to write index file {index_path}: {e}") from e + + def _calculate_intensity_color(self, intensity: float) -> Tuple[int, int, int, float]: + """Calculate RGB color and alpha for given intensity (0-1 range). + + Returns (r, g, b, alpha) tuple representing the heatmap color gradient: + blue -> green -> yellow -> orange -> red + + Results are cached to improve performance. + """ + # Round to 3 decimal places for cache key + cache_key = round(intensity, 3) + if cache_key in self._color_gradient.cache: + return self._color_gradient.cache[cache_key] + + gradient = self._color_gradient + m = gradient.multiplier + + # Color stops with (threshold, rgb_func, alpha_func) + stops = [ + (gradient.stop_1, + lambda i: (0, int(150 * i * m), 255), + lambda i: gradient.alpha_very_cold), + (gradient.stop_2, + lambda i: (0, 255, int(255 * (1 - (i - gradient.stop_1) * m))), + lambda i: gradient.alpha_cold), + (gradient.stop_3, + lambda i: (int(255 * (i - gradient.stop_2) * m), 255, 0), + lambda i: gradient.alpha_medium), + (gradient.stop_4, + lambda i: (255, int(200 - 100 * (i - gradient.stop_3) * m), 0), + lambda i: gradient.alpha_warm), + (gradient.stop_5, + lambda i: (255, int(100 * (1 - (i - gradient.stop_4) * m)), 0), + lambda i: gradient.alpha_hot_base + gradient.alpha_hot_range * (i - gradient.stop_4) * m), + ] + + result = None + for threshold, rgb_func, alpha_func in stops: + if intensity < threshold or threshold == gradient.stop_5: + r, g, b = rgb_func(intensity) + result = (r, g, b, alpha_func(intensity)) + break + + # Fallback + if result is None: + result = (255, 0, 0, 0.75) + + # Cache the result + self._color_gradient.cache[cache_key] = result + return result + + def _generate_file_html(self, output_path: Path, filename: str, + line_counts: Dict[int, int], self_counts: Dict[int, int], + file_stat: FileStats): + """Generate HTML for a single source file with heatmap coloring.""" + # Read source file + try: + source_lines = Path(filename).read_text(encoding='utf-8', errors='replace').splitlines() + except (IOError, OSError) as e: + if not (filename.startswith('<') or filename.startswith('[') or + filename in ('~', '...', '.') or len(filename) < 2): + print(f"Warning: Could not read source file {filename}: {e}") + source_lines = [f"# Source file not available: {filename}"] + + # Generate HTML for each line + max_samples = max(line_counts.values()) if line_counts else 1 + max_self_samples = max(self_counts.values()) if self_counts else 1 + code_lines_html = [ + self._build_line_html(line_num, line_content, line_counts, self_counts, + max_samples, max_self_samples, filename) + for line_num, line_content in enumerate(source_lines, start=1) + ] + + # Populate template + replacements = { + "": html.escape(filename), + "": f"{file_stat.total_samples:,}", + "": f"{file_stat.total_self_samples:,}", + "": str(file_stat.num_lines), + "": f"{file_stat.percentage:.2f}", + "": str(file_stat.max_samples), + "": str(file_stat.max_self_samples), + "": ''.join(code_lines_html), + "": f"", + "": f"", + } + + html_content = self._template_loader.file_template + for placeholder, value in replacements.items(): + html_content = html_content.replace(placeholder, value) + + try: + output_path.write_text(html_content, encoding='utf-8') + except (IOError, OSError) as e: + raise RuntimeError(f"Failed to write file {output_path}: {e}") from e + + def _build_line_html(self, line_num: int, line_content: str, + line_counts: Dict[int, int], self_counts: Dict[int, int], + max_samples: int, max_self_samples: int, filename: str) -> str: + """Build HTML for a single line of source code.""" + cumulative_samples = line_counts.get(line_num, 0) + self_samples = self_counts.get(line_num, 0) + + # Calculate colors for both self and cumulative modes + if cumulative_samples > 0: + cumulative_intensity = cumulative_samples / max_samples if max_samples > 0 else 0 + self_intensity = self_samples / max_self_samples if max_self_samples > 0 and self_samples > 0 else 0 + + # Default to self-based coloring + intensity = self_intensity if self_samples > 0 else cumulative_intensity + r, g, b, alpha = self._calculate_intensity_color(intensity) + bg_color = f"rgba({r}, {g}, {b}, {alpha})" + + # Pre-calculate colors for both modes (for JS toggle) + self_bg_color = self._format_color_for_intensity(self_intensity) if self_samples > 0 else "transparent" + cumulative_bg_color = self._format_color_for_intensity(cumulative_intensity) + + self_display = f"{self_samples:,}" if self_samples > 0 else "" + cumulative_display = f"{cumulative_samples:,}" + tooltip = f"Self: {self_samples:,}, Total: {cumulative_samples:,}" + else: + bg_color = "transparent" + self_bg_color = "transparent" + cumulative_bg_color = "transparent" + self_display = "" + cumulative_display = "" + tooltip = "" + + # Get navigation buttons + nav_buttons_html = self._build_navigation_buttons(filename, line_num) + + # Build line HTML + line_html = html.escape(line_content.rstrip('\n')) + title_attr = f' title="{html.escape(tooltip)}"' if tooltip else "" + + return ( + f'
\n' + f'
{line_num}
\n' + f'
{self_display}
\n' + f'
{cumulative_display}
\n' + f'
{line_html}
\n' + f' {nav_buttons_html}\n' + f'
\n' + ) + + def _format_color_for_intensity(self, intensity: float) -> str: + """Format color as rgba() string for given intensity.""" + r, g, b, alpha = self._calculate_intensity_color(intensity) + return f"rgba({r}, {g}, {b}, {alpha})" + + def _build_navigation_buttons(self, filename: str, line_num: int) -> str: + """Build navigation buttons for callers/callees.""" + line_key = (filename, line_num) + caller_list = self._deduplicate_by_function(self.callers_graph.get(line_key, [])) + callee_list = self._deduplicate_by_function(self.call_graph.get(line_key, [])) + + # Get edge counts for each caller/callee + callers_with_counts = self._get_edge_counts(line_key, caller_list, is_caller=True) + callees_with_counts = self._get_edge_counts(line_key, callee_list, is_caller=False) + + # Build navigation buttons with counts + caller_btn = self._create_navigation_button(callers_with_counts, 'caller', '▲') + callee_btn = self._create_navigation_button(callees_with_counts, 'callee', '▼') + + if caller_btn or callee_btn: + return f'
{caller_btn}{callee_btn}
' + return '' + + def _get_edge_counts(self, line_key: Tuple[str, int], + items: List[Tuple[str, int, str]], + is_caller: bool) -> List[Tuple[str, int, str, int]]: + """Get sample counts for each caller/callee edge.""" + result = [] + for file, line, func in items: + edge_line_key = (file, line) + if is_caller: + edge_key = (edge_line_key, line_key) + else: + edge_key = (line_key, edge_line_key) + + count = self.edge_samples.get(edge_key, 0) + result.append((file, line, func, count)) + + result.sort(key=lambda x: x[3], reverse=True) + return result + + def _deduplicate_by_function(self, items: List[Tuple[str, int, str]]) -> List[Tuple[str, int, str]]: + """Remove duplicate entries based on (file, function) key.""" + seen = {} + result = [] + for file, line, func in items: + key = (file, func) + if key not in seen: + seen[key] = True + result.append((file, line, func)) + return result + + def _create_navigation_button(self, items_with_counts: List[Tuple[str, int, str, int]], + btn_class: str, arrow: str) -> str: + """Create HTML for a navigation button with sample counts.""" + # Filter valid items + valid_items = [(f, l, fn, cnt) for f, l, fn, cnt in items_with_counts + if f in self.file_index and l > 0] + if not valid_items: + return "" + + if len(valid_items) == 1: + file, line, func, count = valid_items[0] + target_html = self.file_index[file] + nav_data = json.dumps({'link': f"{target_html}#line-{line}", 'func': func}) + title = f"Go to {btn_class}: {html.escape(func)} ({count:,} samples)" + return f'' + + # Multiple items - create menu + total_samples = sum(cnt for _, _, _, cnt in valid_items) + items_data = [ + { + 'file': os.path.basename(file), + 'func': func, + 'count': count, + 'link': f"{self.file_index[file]}#line-{line}" + } + for file, line, func, count in valid_items + ] + items_json = html.escape(json.dumps(items_data)) + title = f"{len(items_data)} {btn_class}s ({total_samples:,} samples)" + return f'' diff --git a/Lib/profiling/sampling/live_collector/__init__.py b/Lib/profiling/sampling/live_collector/__init__.py new file mode 100644 index 00000000000..175e4610d23 --- /dev/null +++ b/Lib/profiling/sampling/live_collector/__init__.py @@ -0,0 +1,200 @@ +"""Live profiling collector that displays top-like statistics using curses. + + ┌─────────────────────────────┐ + │ Target Python Process │ + │ (being profiled) │ + └──────────────┬──────────────┘ + │ Stack sampling at + │ configured interval + │ (e.g., 10000µs) + ▼ + ┌─────────────────────────────┐ + │ LiveStatsCollector │ + │ ┌───────────────────────┐ │ + │ │ collect() │ │ Aggregates samples + │ │ - Iterates frames │ │ into statistics + │ │ - Updates counters │ │ + │ └───────────┬───────────┘ │ + │ │ │ + │ ▼ │ + │ ┌───────────────────────┐ │ + │ │ Data Storage │ │ + │ │ - result dict │ │ Tracks per-function: + │ │ - direct_calls │ │ • Direct samples + │ │ - cumulative_calls │ │ • Cumulative samples + │ └───────────┬───────────┘ │ • Derived time stats + │ │ │ + │ ▼ │ + │ ┌───────────────────────┐ │ + │ │ Display Update │ │ + │ │ (10Hz by default) │ │ Rate-limited refresh + │ └───────────┬───────────┘ │ + └──────────────┼──────────────┘ + │ + ▼ + ┌─────────────────────────────┐ + │ DisplayInterface │ + │ (Abstract layer) │ + └──────────────┬──────────────┘ + ┌───────┴────────┐ + │ │ + ┌──────────▼────────┐ ┌───▼──────────┐ + │ CursesDisplay │ │ MockDisplay │ + │ - Real terminal │ │ - Testing │ + │ - ncurses backend │ │ - No UI │ + └─────────┬─────────┘ └──────────────┘ + │ + ▼ + ┌─────────────────────────────────────┐ + │ Widget-Based Rendering │ + │ ┌─────────────────────────────────┐ │ + │ │ HeaderWidget │ │ + │ │ • PID, uptime, time, interval │ │ + │ │ • Sample stats & progress bar │ │ + │ │ • Efficiency bar │ │ + │ │ • Thread status & GC stats │ │ + │ │ • Function summary │ │ + │ │ • Top 3 hottest functions │ │ + │ ├─────────────────────────────────┤ │ + │ │ TableWidget │ │ + │ │ • Column headers (sortable) │ │ Interactive display + │ │ • Stats rows (scrolling) │ │ with keyboard controls: + │ │ - nsamples % time │ │ s: sort, p: pause + │ │ - function file:line │ │ r: reset, /: filter + │ ├─────────────────────────────────┤ │ q: quit, h: help + │ │ FooterWidget │ │ + │ │ • Legend and status │ │ + │ │ • Filter input prompt │ │ + │ └─────────────────────────────────┘ │ + └─────────────────────────────────────┘ + +Architecture: + +The live collector is organized into four layers. The data collection layer +(LiveStatsCollector) aggregates stack samples into per-function statistics without +any knowledge of how they will be presented. The display abstraction layer +(DisplayInterface) defines rendering operations without coupling to curses or any +specific UI framework. The widget layer (Widget, HeaderWidget, TableWidget, +FooterWidget, HelpWidget, ProgressBarWidget) encapsulates individual UI components +with their own rendering logic, promoting modularity and reusability. The +presentation layer (CursesDisplay/MockDisplay) implements the actual rendering for +terminal output and testing. + +The system runs two independent update loops. The sampling loop is driven by the +profiler at the configured interval (e.g., 10000µs) and continuously collects +stack frames and updates statistics. The display loop runs at a fixed refresh rate +(default 10Hz) and updates the terminal independently of sampling frequency. This +separation allows high-frequency sampling without overwhelming the terminal with +constant redraws. + +Statistics are computed incrementally as samples arrive. The collector maintains +running counters (direct calls and cumulative calls) in a dictionary keyed by +function location. Derived metrics like time estimates and percentages are computed +on-demand during display updates rather than being stored, which minimizes memory +overhead as the number of tracked functions grows. + +User input is processed asynchronously during display updates using non-blocking I/O. +This allows interactive controls (sorting, filtering, pausing) without interrupting +the data collection pipeline. The collector maintains mode flags (paused, +filter_input_mode) that affect what gets displayed but not what gets collected. + +""" + +# Re-export all public classes and constants for backward compatibility +from .collector import LiveStatsCollector +from .display import DisplayInterface, CursesDisplay, MockDisplay +from .widgets import ( + Widget, + ProgressBarWidget, + HeaderWidget, + TableWidget, + FooterWidget, + HelpWidget, +) +from .constants import ( + MICROSECONDS_PER_SECOND, + DISPLAY_UPDATE_HZ, + DISPLAY_UPDATE_INTERVAL, + MIN_TERMINAL_WIDTH, + MIN_TERMINAL_HEIGHT, + WIDTH_THRESHOLD_SAMPLE_PCT, + WIDTH_THRESHOLD_TOTTIME, + WIDTH_THRESHOLD_CUMUL_PCT, + WIDTH_THRESHOLD_CUMTIME, + HEADER_LINES, + FOOTER_LINES, + SAFETY_MARGIN, + TOP_FUNCTIONS_DISPLAY_COUNT, + COL_WIDTH_NSAMPLES, + COL_SPACING, + COL_WIDTH_SAMPLE_PCT, + COL_WIDTH_TIME, + MIN_FUNC_NAME_WIDTH, + MAX_FUNC_NAME_WIDTH, + MIN_AVAILABLE_SPACE, + MIN_BAR_WIDTH, + MAX_SAMPLE_RATE_BAR_WIDTH, + MAX_EFFICIENCY_BAR_WIDTH, + MIN_SAMPLE_RATE_FOR_SCALING, + FINISHED_BANNER_EXTRA_LINES, + COLOR_PAIR_HEADER_BG, + COLOR_PAIR_CYAN, + COLOR_PAIR_YELLOW, + COLOR_PAIR_GREEN, + COLOR_PAIR_MAGENTA, + COLOR_PAIR_RED, + COLOR_PAIR_SORTED_HEADER, + DEFAULT_SORT_BY, + DEFAULT_DISPLAY_LIMIT, +) + +__all__ = [ + # Main collector + "LiveStatsCollector", + # Display interfaces + "DisplayInterface", + "CursesDisplay", + "MockDisplay", + # Widgets + "Widget", + "ProgressBarWidget", + "HeaderWidget", + "TableWidget", + "FooterWidget", + "HelpWidget", + # Constants + "MICROSECONDS_PER_SECOND", + "DISPLAY_UPDATE_HZ", + "DISPLAY_UPDATE_INTERVAL", + "MIN_TERMINAL_WIDTH", + "MIN_TERMINAL_HEIGHT", + "WIDTH_THRESHOLD_SAMPLE_PCT", + "WIDTH_THRESHOLD_TOTTIME", + "WIDTH_THRESHOLD_CUMUL_PCT", + "WIDTH_THRESHOLD_CUMTIME", + "HEADER_LINES", + "FOOTER_LINES", + "SAFETY_MARGIN", + "TOP_FUNCTIONS_DISPLAY_COUNT", + "COL_WIDTH_NSAMPLES", + "COL_SPACING", + "COL_WIDTH_SAMPLE_PCT", + "COL_WIDTH_TIME", + "MIN_FUNC_NAME_WIDTH", + "MAX_FUNC_NAME_WIDTH", + "MIN_AVAILABLE_SPACE", + "MIN_BAR_WIDTH", + "MAX_SAMPLE_RATE_BAR_WIDTH", + "MAX_EFFICIENCY_BAR_WIDTH", + "MIN_SAMPLE_RATE_FOR_SCALING", + "FINISHED_BANNER_EXTRA_LINES", + "COLOR_PAIR_HEADER_BG", + "COLOR_PAIR_CYAN", + "COLOR_PAIR_YELLOW", + "COLOR_PAIR_GREEN", + "COLOR_PAIR_MAGENTA", + "COLOR_PAIR_RED", + "COLOR_PAIR_SORTED_HEADER", + "DEFAULT_SORT_BY", + "DEFAULT_DISPLAY_LIMIT", +] diff --git a/Lib/profiling/sampling/live_collector/collector.py b/Lib/profiling/sampling/live_collector/collector.py new file mode 100644 index 00000000000..7adbf1bbe7f --- /dev/null +++ b/Lib/profiling/sampling/live_collector/collector.py @@ -0,0 +1,1017 @@ +"""LiveStatsCollector - Main collector class for live profiling.""" + +import collections +import contextlib +import curses +from dataclasses import dataclass, field +import os +import site +import sys +import sysconfig +import time +import _colorize + +from ..collector import Collector +from ..constants import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + PROFILING_MODE_WALL, +) +from .constants import ( + MICROSECONDS_PER_SECOND, + DISPLAY_UPDATE_INTERVAL, + MIN_TERMINAL_WIDTH, + MIN_TERMINAL_HEIGHT, + HEADER_LINES, + FOOTER_LINES, + SAFETY_MARGIN, + FINISHED_BANNER_EXTRA_LINES, + DEFAULT_SORT_BY, + DEFAULT_DISPLAY_LIMIT, + COLOR_PAIR_HEADER_BG, + COLOR_PAIR_CYAN, + COLOR_PAIR_YELLOW, + COLOR_PAIR_GREEN, + COLOR_PAIR_MAGENTA, + COLOR_PAIR_RED, + COLOR_PAIR_SORTED_HEADER, +) +from .display import CursesDisplay +from .widgets import HeaderWidget, TableWidget, FooterWidget, HelpWidget +from .trend_tracker import TrendTracker + + +@dataclass +class ThreadData: + """Encapsulates all profiling data for a single thread.""" + + thread_id: int + + # Function call statistics: {location: {direct_calls: int, cumulative_calls: int}} + result: dict = field(default_factory=lambda: collections.defaultdict( + lambda: dict(direct_calls=0, cumulative_calls=0) + )) + + # Thread status statistics + has_gil: int = 0 + on_cpu: int = 0 + gil_requested: int = 0 + unknown: int = 0 + total: int = 0 # Total status samples for this thread + + # Sample counts + sample_count: int = 0 + gc_frame_samples: int = 0 + + def increment_status_flag(self, status_flags): + """Update status counts based on status bit flags.""" + if status_flags & THREAD_STATUS_HAS_GIL: + self.has_gil += 1 + if status_flags & THREAD_STATUS_ON_CPU: + self.on_cpu += 1 + if status_flags & THREAD_STATUS_GIL_REQUESTED: + self.gil_requested += 1 + if status_flags & THREAD_STATUS_UNKNOWN: + self.unknown += 1 + self.total += 1 + + def as_status_dict(self): + """Return status counts as a dict for compatibility.""" + return { + "has_gil": self.has_gil, + "on_cpu": self.on_cpu, + "gil_requested": self.gil_requested, + "unknown": self.unknown, + "total": self.total, + } + + +class LiveStatsCollector(Collector): + """Collector that displays live top-like statistics using ncurses.""" + + def __init__( + self, + sample_interval_usec, + *, + skip_idle=False, + sort_by=DEFAULT_SORT_BY, + limit=DEFAULT_DISPLAY_LIMIT, + pid=None, + display=None, + mode=None, + ): + """ + Initialize the live stats collector. + + Args: + sample_interval_usec: Sampling interval in microseconds + skip_idle: Whether to skip idle threads + sort_by: Sort key ('tottime', 'nsamples', 'cumtime', 'sample_pct', 'cumul_pct') + limit: Maximum number of functions to display + pid: Process ID being profiled + display: DisplayInterface implementation (None means curses will be used) + mode: Profiling mode ('cpu', 'gil', etc.) - affects what stats are shown + """ + self.result = collections.defaultdict( + lambda: dict(total_rec_calls=0, direct_calls=0, cumulative_calls=0) + ) + self.sample_interval_usec = sample_interval_usec + self.sample_interval_sec = ( + sample_interval_usec / MICROSECONDS_PER_SECOND + ) + self.skip_idle = skip_idle + self.sort_by = sort_by + self.limit = limit + self.total_samples = 0 + self.start_time = None + self.stdscr = None + self.display = display # DisplayInterface implementation + self.running = True + self.pid = pid + self.mode = mode # Profiling mode + self._saved_stdout = None + self._saved_stderr = None + self._devnull = None + self._last_display_update = None + self.max_sample_rate = 0 # Track maximum sample rate seen + self.successful_samples = 0 # Track samples that captured frames + self.failed_samples = 0 # Track samples that failed to capture frames + self.display_update_interval = DISPLAY_UPDATE_INTERVAL # Instance variable for display refresh rate + + # Thread status statistics (bit flags) + self.thread_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, # Total thread count across all samples + } + self.gc_frame_samples = 0 # Track samples with GC frames + + # Interactive controls state + self.paused = False # Pause UI updates (profiling continues) + self.show_help = False # Show help screen + self.filter_pattern = None # Glob pattern to filter functions + self.filter_input_mode = False # Currently entering filter text + self.filter_input_buffer = "" # Buffer for filter input + self.finished = False # Program has finished, showing final state + self.finish_timestamp = None # When profiling finished (for time freezing) + self.finish_wall_time = None # Wall clock time when profiling finished + + # Thread tracking state + self.thread_ids = [] # List of thread IDs seen + self.view_mode = "ALL" # "ALL" or "PER_THREAD" + self.current_thread_index = ( + 0 # Index into thread_ids when in PER_THREAD mode + ) + self.per_thread_data = {} # {thread_id: ThreadData} + + # Calculate common path prefixes to strip + self._path_prefixes = self._get_common_path_prefixes() + + # Widgets (initialized when display is available) + self.header_widget = None + self.table_widget = None + self.footer_widget = None + self.help_widget = None + + # Color mode + self._can_colorize = _colorize.can_colorize() + + # Trend tracking (initialized after colors are set up) + self._trend_tracker = None + + @property + def elapsed_time(self): + """Get the elapsed time, frozen when finished.""" + if self.finished and self.finish_timestamp is not None: + return self.finish_timestamp - self.start_time + return time.perf_counter() - self.start_time if self.start_time else 0 + + @property + def current_time_display(self): + """Get the current time for display, frozen when finished.""" + if self.finished and self.finish_wall_time is not None: + return time.strftime("%H:%M:%S", time.localtime(self.finish_wall_time)) + return time.strftime("%H:%M:%S") + + def _get_or_create_thread_data(self, thread_id): + """Get or create ThreadData for a thread ID.""" + if thread_id not in self.per_thread_data: + self.per_thread_data[thread_id] = ThreadData(thread_id=thread_id) + return self.per_thread_data[thread_id] + + def _get_current_thread_data(self): + """Get ThreadData for currently selected thread in PER_THREAD mode.""" + if self.view_mode == "PER_THREAD" and self.current_thread_index < len(self.thread_ids): + thread_id = self.thread_ids[self.current_thread_index] + return self.per_thread_data.get(thread_id) + return None + + def _get_current_result_source(self): + """Get result dict for current view mode (aggregated or per-thread).""" + if self.view_mode == "ALL": + return self.result + thread_data = self._get_current_thread_data() + return thread_data.result if thread_data else {} + + def _get_common_path_prefixes(self): + """Get common path prefixes to strip from file paths.""" + prefixes = [] + + # Get the actual stdlib location from the os module + # This works for both installed Python and development builds + os_module_file = os.__file__ + if os_module_file: + # os.__file__ points to os.py, get its directory + stdlib_dir = os.path.dirname(os.path.abspath(os_module_file)) + prefixes.append(stdlib_dir) + + # Get stdlib location from sysconfig (may be different or same) + stdlib_path = sysconfig.get_path("stdlib") + if stdlib_path: + prefixes.append(stdlib_path) + + # Get platstdlib location (platform-specific stdlib) + platstdlib_path = sysconfig.get_path("platstdlib") + if platstdlib_path: + prefixes.append(platstdlib_path) + + # Get site-packages locations + for site_path in site.getsitepackages(): + prefixes.append(site_path) + + # Also check user site-packages + user_site = site.getusersitepackages() + if user_site: + prefixes.append(user_site) + + # Remove duplicates and sort by length (longest first) to match most specific paths first + prefixes = list(set(prefixes)) + prefixes.sort(key=lambda x: len(x), reverse=True) + + return prefixes + + def simplify_path(self, filepath): + """Simplify a file path by removing common prefixes.""" + # Try to match against known prefixes + for prefix_path in self._path_prefixes: + if filepath.startswith(prefix_path): + # Remove the prefix completely + relative = filepath[len(prefix_path) :].lstrip(os.sep) + return relative + + # If no match, return the original path + return filepath + + def process_frames(self, frames, thread_id=None): + """Process a single thread's frame stack. + + Args: + frames: List of frame information + thread_id: Thread ID for per-thread tracking (optional) + """ + if not frames: + return + + # Get per-thread data if tracking per-thread + thread_data = self._get_or_create_thread_data(thread_id) if thread_id is not None else None + + # Process each frame in the stack to track cumulative calls + for frame in frames: + location = (frame.filename, frame.lineno, frame.funcname) + self.result[location]["cumulative_calls"] += 1 + if thread_data: + thread_data.result[location]["cumulative_calls"] += 1 + + # The top frame gets counted as an inline call (directly executing) + top_location = (frames[0].filename, frames[0].lineno, frames[0].funcname) + self.result[top_location]["direct_calls"] += 1 + if thread_data: + thread_data.result[top_location]["direct_calls"] += 1 + + def collect_failed_sample(self): + self.failed_samples += 1 + self.total_samples += 1 + + def collect(self, stack_frames): + """Collect and display profiling data.""" + if self.start_time is None: + self.start_time = time.perf_counter() + self._last_display_update = self.start_time + + # Thread status counts for this sample + temp_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + } + has_gc_frame = False + + # Always collect data, even when paused + # Track thread status flags and GC frames + for interpreter_info in stack_frames: + threads = getattr(interpreter_info, "threads", []) + for thread_info in threads: + temp_status_counts["total"] += 1 + + # Track thread status using bit flags + status_flags = getattr(thread_info, "status", 0) + thread_id = getattr(thread_info, "thread_id", None) + + # Update aggregated counts + if status_flags & THREAD_STATUS_HAS_GIL: + temp_status_counts["has_gil"] += 1 + if status_flags & THREAD_STATUS_ON_CPU: + temp_status_counts["on_cpu"] += 1 + if status_flags & THREAD_STATUS_GIL_REQUESTED: + temp_status_counts["gil_requested"] += 1 + if status_flags & THREAD_STATUS_UNKNOWN: + temp_status_counts["unknown"] += 1 + + # Update per-thread status counts + if thread_id is not None: + thread_data = self._get_or_create_thread_data(thread_id) + thread_data.increment_status_flag(status_flags) + + # Process frames (respecting skip_idle) + if self.skip_idle: + has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL) + on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU) + if not (has_gil or on_cpu): + continue + + frames = getattr(thread_info, "frame_info", None) + if frames: + self.process_frames(frames, thread_id=thread_id) + + # Track thread IDs only for threads that actually have samples + if ( + thread_id is not None + and thread_id not in self.thread_ids + ): + self.thread_ids.append(thread_id) + + # Increment per-thread sample count and check for GC frames + thread_has_gc_frame = False + for frame in frames: + funcname = getattr(frame, "funcname", "") + if "" in funcname or "gc_collect" in funcname: + has_gc_frame = True + thread_has_gc_frame = True + break + + if thread_id is not None: + thread_data = self._get_or_create_thread_data(thread_id) + thread_data.sample_count += 1 + if thread_has_gc_frame: + thread_data.gc_frame_samples += 1 + + # Update cumulative thread status counts + for key, count in temp_status_counts.items(): + self.thread_status_counts[key] += count + + if has_gc_frame: + self.gc_frame_samples += 1 + + self.successful_samples += 1 + self.total_samples += 1 + + # Handle input on every sample for instant responsiveness + if self.display is not None: + self._handle_input() + + # Update display at configured rate if display is initialized and not paused + if self.display is not None and not self.paused: + current_time = time.perf_counter() + if ( + self._last_display_update is None + or (current_time - self._last_display_update) + >= self.display_update_interval + ): + self._update_display() + self._last_display_update = current_time + + def _prepare_display_data(self, height): + """Prepare data for display rendering.""" + elapsed = self.elapsed_time + stats_list = self.build_stats_list() + + # Calculate available space for stats + # Add extra lines for finished banner when in finished state + extra_header_lines = ( + FINISHED_BANNER_EXTRA_LINES if self.finished else 0 + ) + max_stats_lines = max( + 0, + height + - HEADER_LINES + - extra_header_lines + - FOOTER_LINES + - SAFETY_MARGIN, + ) + stats_list = stats_list[:max_stats_lines] + + return elapsed, stats_list + + def _initialize_widgets(self, colors): + """Initialize widgets with display and colors.""" + if self.header_widget is None: + # Initialize trend tracker with colors + if self._trend_tracker is None: + self._trend_tracker = TrendTracker(colors, enabled=True) + + self.header_widget = HeaderWidget(self.display, colors, self) + self.table_widget = TableWidget(self.display, colors, self) + self.footer_widget = FooterWidget(self.display, colors, self) + self.help_widget = HelpWidget(self.display, colors) + + def _render_display_sections( + self, height, width, elapsed, stats_list, colors + ): + """Render all display sections to the screen.""" + line = 0 + try: + # Initialize widgets if not already done + self._initialize_widgets(colors) + + # Render header + line = self.header_widget.render( + line, width, elapsed=elapsed, stats_list=stats_list + ) + + # Render table + line = self.table_widget.render( + line, width, height=height, stats_list=stats_list + ) + + except curses.error: + pass + + def _update_display(self): + """Update the display with current stats.""" + try: + # Clear screen and get dimensions + self.display.clear() + height, width = self.display.get_dimensions() + + # Check terminal size + if width < MIN_TERMINAL_WIDTH or height < MIN_TERMINAL_HEIGHT: + self._show_terminal_too_small(height, width) + self.display.refresh() + return + + # Setup colors and initialize widgets (needed for both help and normal display) + colors = self._setup_colors() + self._initialize_widgets(colors) + + # Show help screen if requested + if self.show_help: + self.help_widget.render(0, width, height=height) + self.display.refresh() + return + + # Prepare data + elapsed, stats_list = self._prepare_display_data(height) + + # Render all sections + self._render_display_sections( + height, width, elapsed, stats_list, colors + ) + + # Footer + self.footer_widget.render(height - 2, width) + + # Show filter input prompt if in filter input mode + if self.filter_input_mode: + self.footer_widget.render_filter_input_prompt( + height - 1, width + ) + + # Refresh display + self.display.redraw() + self.display.refresh() + + except Exception: + pass + + def _cycle_sort(self, reverse=False): + """Cycle through different sort modes in column order. + + Args: + reverse: If True, cycle backwards (right to left), otherwise forward (left to right) + """ + sort_modes = [ + "nsamples", + "sample_pct", + "tottime", + "cumul_pct", + "cumtime", + ] + try: + current_idx = sort_modes.index(self.sort_by) + if reverse: + self.sort_by = sort_modes[(current_idx - 1) % len(sort_modes)] + else: + self.sort_by = sort_modes[(current_idx + 1) % len(sort_modes)] + except ValueError: + self.sort_by = "nsamples" + + def _setup_colors(self): + """Set up color pairs and return color attributes.""" + + A_BOLD = self.display.get_attr("A_BOLD") + A_REVERSE = self.display.get_attr("A_REVERSE") + A_UNDERLINE = self.display.get_attr("A_UNDERLINE") + A_NORMAL = self.display.get_attr("A_NORMAL") + + # Check both curses color support and _colorize.can_colorize() + if self.display.has_colors() and self._can_colorize: + with contextlib.suppress(Exception): + # Color constants (using curses values for compatibility) + COLOR_CYAN = 6 + COLOR_GREEN = 2 + COLOR_YELLOW = 3 + COLOR_BLACK = 0 + COLOR_MAGENTA = 5 + COLOR_RED = 1 + + # Initialize all color pairs used throughout the UI + self.display.init_color_pair( + 1, COLOR_CYAN, -1 + ) # Data colors for stats rows + self.display.init_color_pair(2, COLOR_GREEN, -1) + self.display.init_color_pair(3, COLOR_YELLOW, -1) + self.display.init_color_pair( + COLOR_PAIR_HEADER_BG, COLOR_BLACK, COLOR_GREEN + ) + self.display.init_color_pair( + COLOR_PAIR_CYAN, COLOR_CYAN, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_YELLOW, COLOR_YELLOW, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_GREEN, COLOR_GREEN, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_MAGENTA, COLOR_MAGENTA, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_RED, COLOR_RED, COLOR_BLACK + ) + self.display.init_color_pair( + COLOR_PAIR_SORTED_HEADER, COLOR_BLACK, COLOR_YELLOW + ) + + return { + "header": self.display.get_color_pair(COLOR_PAIR_HEADER_BG) + | A_BOLD, + "cyan": self.display.get_color_pair(COLOR_PAIR_CYAN) + | A_BOLD, + "yellow": self.display.get_color_pair(COLOR_PAIR_YELLOW) + | A_BOLD, + "green": self.display.get_color_pair(COLOR_PAIR_GREEN) + | A_BOLD, + "magenta": self.display.get_color_pair(COLOR_PAIR_MAGENTA) + | A_BOLD, + "red": self.display.get_color_pair(COLOR_PAIR_RED) + | A_BOLD, + "sorted_header": self.display.get_color_pair( + COLOR_PAIR_SORTED_HEADER + ) + | A_BOLD, + "normal_header": A_REVERSE | A_BOLD, + "color_samples": self.display.get_color_pair(1), + "color_file": self.display.get_color_pair(2), + "color_func": self.display.get_color_pair(3), + # Trend colors (stock-like indicators) + "trend_up": self.display.get_color_pair(COLOR_PAIR_GREEN) | A_BOLD, + "trend_down": self.display.get_color_pair(COLOR_PAIR_RED) | A_BOLD, + "trend_stable": A_NORMAL, + } + + # Fallback to non-color attributes + return { + "header": A_REVERSE | A_BOLD, + "cyan": A_BOLD, + "yellow": A_BOLD, + "green": A_BOLD, + "magenta": A_BOLD, + "red": A_BOLD, + "sorted_header": A_REVERSE | A_BOLD | A_UNDERLINE, + "normal_header": A_REVERSE | A_BOLD, + "color_samples": A_NORMAL, + "color_file": A_NORMAL, + "color_func": A_NORMAL, + # Trend colors (fallback to bold/normal for monochrome) + "trend_up": A_BOLD, + "trend_down": A_BOLD, + "trend_stable": A_NORMAL, + } + + def build_stats_list(self): + """Build and sort the statistics list.""" + stats_list = [] + result_source = self._get_current_result_source() + + for func, call_counts in result_source.items(): + # Apply filter if set (using substring matching) + if self.filter_pattern: + filename, lineno, funcname = func + # Simple substring match (case-insensitive) + pattern_lower = self.filter_pattern.lower() + filename_lower = filename.lower() + funcname_lower = funcname.lower() + + # Match if pattern is substring of filename, funcname, or combined + matched = ( + pattern_lower in filename_lower + or pattern_lower in funcname_lower + or pattern_lower in f"{filename_lower}:{funcname_lower}" + ) + if not matched: + continue + + direct_calls = call_counts.get("direct_calls", 0) + cumulative_calls = call_counts.get("cumulative_calls", 0) + total_time = direct_calls * self.sample_interval_sec + cumulative_time = cumulative_calls * self.sample_interval_sec + + # Calculate sample percentages + sample_pct = (direct_calls / self.total_samples * 100) if self.total_samples > 0 else 0 + cumul_pct = (cumulative_calls / self.total_samples * 100) if self.total_samples > 0 else 0 + + # Calculate trends for all columns using TrendTracker + trends = {} + if self._trend_tracker is not None: + trends = self._trend_tracker.update_metrics( + func, + { + 'nsamples': direct_calls, + 'tottime': total_time, + 'cumtime': cumulative_time, + 'sample_pct': sample_pct, + 'cumul_pct': cumul_pct, + } + ) + + stats_list.append( + { + "func": func, + "direct_calls": direct_calls, + "cumulative_calls": cumulative_calls, + "total_time": total_time, + "cumulative_time": cumulative_time, + "trends": trends, # Dictionary of trends for all columns + } + ) + + # Sort the stats + if self.sort_by == "nsamples": + stats_list.sort(key=lambda x: x["direct_calls"], reverse=True) + elif self.sort_by == "tottime": + stats_list.sort(key=lambda x: x["total_time"], reverse=True) + elif self.sort_by == "cumtime": + stats_list.sort(key=lambda x: x["cumulative_time"], reverse=True) + elif self.sort_by == "sample_pct": + stats_list.sort( + key=lambda x: (x["direct_calls"] / self.total_samples * 100) + if self.total_samples > 0 + else 0, + reverse=True, + ) + elif self.sort_by == "cumul_pct": + stats_list.sort( + key=lambda x: ( + x["cumulative_calls"] / self.total_samples * 100 + ) + if self.total_samples > 0 + else 0, + reverse=True, + ) + + return stats_list + + def reset_stats(self): + """Reset all collected statistics.""" + self.result.clear() + self.per_thread_data.clear() + self.thread_ids.clear() + self.view_mode = "ALL" + self.current_thread_index = 0 + self.total_samples = 0 + self.successful_samples = 0 + self.failed_samples = 0 + self.max_sample_rate = 0 + self.thread_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + } + self.gc_frame_samples = 0 + # Clear trend tracking + if self._trend_tracker is not None: + self._trend_tracker.clear() + # Reset finished state and finish timestamp + self.finished = False + self.finish_timestamp = None + self.finish_wall_time = None + self.start_time = time.perf_counter() + self._last_display_update = self.start_time + + def mark_finished(self): + """Mark the profiling session as finished.""" + self.finished = True + # Capture the finish timestamp to freeze all timing displays + self.finish_timestamp = time.perf_counter() + self.finish_wall_time = time.time() # Wall clock time for display + # Force a final display update to show the finished message + if self.display is not None: + self._update_display() + + def _handle_finished_input_update(self, had_input): + """Update display after input when program is finished.""" + if self.finished and had_input and self.display is not None: + self._update_display() + + def _show_terminal_too_small(self, height, width): + """Display a message when terminal is too small.""" + A_BOLD = self.display.get_attr("A_BOLD") + msg1 = "Terminal too small!" + msg2 = f"Need: {MIN_TERMINAL_WIDTH}x{MIN_TERMINAL_HEIGHT}" + msg3 = f"Have: {width}x{height}" + msg4 = "Please resize" + + # Center the messages + if height >= 4: + self.display.add_str( + height // 2 - 2, + max(0, (width - len(msg1)) // 2), + msg1[: width - 1], + A_BOLD, + ) + self.display.add_str( + height // 2 - 1, + max(0, (width - len(msg2)) // 2), + msg2[: width - 1], + ) + self.display.add_str( + height // 2, + max(0, (width - len(msg3)) // 2), + msg3[: width - 1], + ) + self.display.add_str( + height // 2 + 1, + max(0, (width - len(msg4)) // 2), + msg4[: width - 1], + ) + elif height >= 1: + self.display.add_str(0, 0, msg1[: width - 1], A_BOLD) + + def _show_terminal_size_warning_and_wait(self, height, width): + """Show terminal size warning during initialization and wait for user acknowledgment.""" + A_BOLD = self.display.get_attr("A_BOLD") + A_DIM = self.display.get_attr("A_DIM") + + self.display.clear() + msg1 = "WARNING: Terminal too small!" + msg2 = f"Required: {MIN_TERMINAL_WIDTH}x{MIN_TERMINAL_HEIGHT}" + msg3 = f"Current: {width}x{height}" + msg4 = "Please resize your terminal for best experience" + msg5 = "Press any key to continue..." + + # Center the messages + if height >= 5: + self.display.add_str( + height // 2 - 2, + max(0, (width - len(msg1)) // 2), + msg1[: width - 1], + A_BOLD, + ) + self.display.add_str( + height // 2 - 1, + max(0, (width - len(msg2)) // 2), + msg2[: width - 1], + ) + self.display.add_str( + height // 2, + max(0, (width - len(msg3)) // 2), + msg3[: width - 1], + ) + self.display.add_str( + height // 2 + 1, + max(0, (width - len(msg4)) // 2), + msg4[: width - 1], + ) + self.display.add_str( + height // 2 + 3, + max(0, (width - len(msg5)) // 2), + msg5[: width - 1], + A_DIM, + ) + elif height >= 1: + self.display.add_str(0, 0, msg1[: width - 1], A_BOLD) + + self.display.refresh() + # Wait for user acknowledgment (2 seconds timeout) + self.display.set_nodelay(False) + # Note: timeout is curses-specific, skipping for now + self.display.get_input() + self.display.set_nodelay(True) + + def _handle_input(self): + """Handle keyboard input (non-blocking).""" + from . import constants + + self.display.set_nodelay(True) + ch = self.display.get_input() + + # Handle filter input mode FIRST - takes precedence over all commands + if self.filter_input_mode: + if ch == 27: # ESC key + self.filter_input_mode = False + self.filter_input_buffer = "" + elif ch == 10 or ch == 13: # Enter key + self.filter_pattern = ( + self.filter_input_buffer + if self.filter_input_buffer + else None + ) + self.filter_input_mode = False + self.filter_input_buffer = "" + elif ch == 127 or ch == 263: # Backspace + if self.filter_input_buffer: + self.filter_input_buffer = self.filter_input_buffer[:-1] + elif ch >= 32 and ch < 127: # Printable characters + self.filter_input_buffer += chr(ch) + + # Update display if input was processed while finished + self._handle_finished_input_update(ch != -1) + return + + # Handle help toggle keys + if ch == ord("h") or ch == ord("H") or ch == ord("?"): + self.show_help = not self.show_help + return + + # If showing help, any other key closes it + if self.show_help and ch != -1: + self.show_help = False + return + + # Handle regular commands + if ch == ord("q") or ch == ord("Q"): + self.running = False + + elif ch == ord("s"): + self._cycle_sort(reverse=False) + + elif ch == ord("S"): + self._cycle_sort(reverse=True) + + elif ch == ord("p") or ch == ord("P"): + self.paused = not self.paused + + elif ch == ord("r") or ch == ord("R"): + # Don't allow reset when profiling is finished + if not self.finished: + self.reset_stats() + + elif ch == ord("+") or ch == ord("="): + # Decrease update interval (faster refresh) + self.display_update_interval = max( + 0.05, self.display_update_interval - 0.05 + ) # Min 20Hz + + elif ch == ord("-") or ch == ord("_"): + # Increase update interval (slower refresh) + self.display_update_interval = min( + 1.0, self.display_update_interval + 0.05 + ) # Max 1Hz + + elif ch == ord("c") or ch == ord("C"): + if self.filter_pattern: + self.filter_pattern = None + + elif ch == ord("/"): + self.filter_input_mode = True + self.filter_input_buffer = self.filter_pattern or "" + + elif ch == ord("t") or ch == ord("T"): + # Toggle between ALL and PER_THREAD modes + if self.view_mode == "ALL": + if len(self.thread_ids) > 0: + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.view_mode = "ALL" + + elif ch == ord("x") or ch == ord("X"): + # Toggle trend colors on/off + if self._trend_tracker is not None: + self._trend_tracker.toggle() + + elif ch == curses.KEY_LEFT or ch == curses.KEY_UP: + # Navigate to previous thread in PER_THREAD mode, or switch from ALL to PER_THREAD + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.current_thread_index = ( + self.current_thread_index - 1 + ) % len(self.thread_ids) + + elif ch == curses.KEY_RIGHT or ch == curses.KEY_DOWN: + # Navigate to next thread in PER_THREAD mode, or switch from ALL to PER_THREAD + if len(self.thread_ids) > 0: + if self.view_mode == "ALL": + self.view_mode = "PER_THREAD" + self.current_thread_index = 0 + else: + self.current_thread_index = ( + self.current_thread_index + 1 + ) % len(self.thread_ids) + + # Update display if input was processed while finished + self._handle_finished_input_update(ch != -1) + + def init_curses(self, stdscr): + """Initialize curses display and suppress stdout/stderr.""" + self.stdscr = stdscr + self.display = CursesDisplay(stdscr) + + # Check terminal size upfront and warn if too small + height, width = self.display.get_dimensions() + + if width < MIN_TERMINAL_WIDTH or height < MIN_TERMINAL_HEIGHT: + # Show warning and wait briefly for user to see it + self._show_terminal_size_warning_and_wait(height, width) + + curses.curs_set(0) # Hide cursor + stdscr.nodelay(True) # Non-blocking input + stdscr.scrollok(False) # Disable scrolling + stdscr.idlok(False) # Disable hardware insert/delete + stdscr.leaveok(True) # Don't care about cursor position + + if curses.has_colors(): + curses.start_color() + curses.use_default_colors() + + # Suppress stdout and stderr to prevent interfering with curses display + # Use contextlib.redirect_stdout/stderr for better resource management + self._saved_stdout = sys.stdout + self._saved_stderr = sys.stderr + # Open devnull and ensure it's cleaned up even if an exception occurs + try: + self._devnull = open(os.devnull, "w") + sys.stdout = self._devnull + sys.stderr = self._devnull + except Exception: + # If redirection fails, restore original streams + sys.stdout = self._saved_stdout + sys.stderr = self._saved_stderr + raise + + # Initial clear + self.display.clear() + self.display.refresh() + + def cleanup_curses(self): + """Clean up curses display and restore stdout/stderr.""" + # Restore stdout and stderr in reverse order + # Use try-finally to ensure cleanup even if restoration fails + try: + if self._saved_stdout is not None: + sys.stdout = self._saved_stdout + self._saved_stdout = None + if self._saved_stderr is not None: + sys.stderr = self._saved_stderr + self._saved_stderr = None + finally: + # Always close devnull, even if stdout/stderr restoration fails + if self._devnull is not None: + with contextlib.suppress(Exception): + self._devnull.close() + self._devnull = None + + if self.display is not None and self.stdscr is not None: + with contextlib.suppress(Exception): + curses.curs_set(1) # Show cursor + self.display.set_nodelay(False) + + def export(self, filename): + """Export is not supported in live mode.""" + raise NotImplementedError( + "Export to file is not supported in live mode. " + "Use the live TUI to view statistics in real-time." + ) diff --git a/Lib/profiling/sampling/live_collector/constants.py b/Lib/profiling/sampling/live_collector/constants.py new file mode 100644 index 00000000000..e4690c90baf --- /dev/null +++ b/Lib/profiling/sampling/live_collector/constants.py @@ -0,0 +1,59 @@ +"""Constants for the live profiling collector.""" + +# Time conversion constants +MICROSECONDS_PER_SECOND = 1_000_000 + +# Display update constants +DISPLAY_UPDATE_HZ = 10 +DISPLAY_UPDATE_INTERVAL = 1.0 / DISPLAY_UPDATE_HZ # 0.1 seconds + +# Terminal size constraints +MIN_TERMINAL_WIDTH = 60 +MIN_TERMINAL_HEIGHT = 12 + +# Column width thresholds +WIDTH_THRESHOLD_SAMPLE_PCT = 80 +WIDTH_THRESHOLD_TOTTIME = 100 +WIDTH_THRESHOLD_CUMUL_PCT = 120 +WIDTH_THRESHOLD_CUMTIME = 140 + +# Display layout constants +HEADER_LINES = 10 # Increased to include thread status line +FOOTER_LINES = 2 +SAFETY_MARGIN = 1 +TOP_FUNCTIONS_DISPLAY_COUNT = 3 + +# Column widths for data display +COL_WIDTH_NSAMPLES = 13 +COL_SPACING = 2 +COL_WIDTH_SAMPLE_PCT = 5 +COL_WIDTH_TIME = 10 + +# Function name display +MIN_FUNC_NAME_WIDTH = 10 +MAX_FUNC_NAME_WIDTH = 40 +MIN_AVAILABLE_SPACE = 10 + +# Progress bar display +MIN_BAR_WIDTH = 10 +MAX_SAMPLE_RATE_BAR_WIDTH = 30 +MAX_EFFICIENCY_BAR_WIDTH = 60 + +# Sample rate scaling +MIN_SAMPLE_RATE_FOR_SCALING = 100 + +# Finished banner display +FINISHED_BANNER_EXTRA_LINES = 3 # Blank line + banner + blank line + +# Color pair IDs +COLOR_PAIR_HEADER_BG = 4 +COLOR_PAIR_CYAN = 5 +COLOR_PAIR_YELLOW = 6 +COLOR_PAIR_GREEN = 7 +COLOR_PAIR_MAGENTA = 8 +COLOR_PAIR_RED = 9 +COLOR_PAIR_SORTED_HEADER = 10 + +# Default display settings +DEFAULT_SORT_BY = "nsamples" # Number of samples in leaf (self time) +DEFAULT_DISPLAY_LIMIT = 20 diff --git a/Lib/profiling/sampling/live_collector/display.py b/Lib/profiling/sampling/live_collector/display.py new file mode 100644 index 00000000000..d7f65ad73fd --- /dev/null +++ b/Lib/profiling/sampling/live_collector/display.py @@ -0,0 +1,236 @@ +"""Display interface abstractions for the live profiling collector.""" + +import contextlib +import curses +from abc import ABC, abstractmethod + + +class DisplayInterface(ABC): + """Abstract interface for display operations to enable testing.""" + + @abstractmethod + def get_dimensions(self): + """Get terminal dimensions as (height, width).""" + pass + + @abstractmethod + def clear(self): + """Clear the screen.""" + pass + + @abstractmethod + def refresh(self): + """Refresh the screen to show changes.""" + pass + + @abstractmethod + def redraw(self): + """Redraw the entire window.""" + pass + + @abstractmethod + def add_str(self, line, col, text, attr=0): + """Add a string at the specified position.""" + pass + + @abstractmethod + def get_input(self): + """Get a character from input (non-blocking). Returns -1 if no input.""" + pass + + @abstractmethod + def set_nodelay(self, flag): + """Set non-blocking mode for input.""" + pass + + @abstractmethod + def has_colors(self): + """Check if terminal supports colors.""" + pass + + @abstractmethod + def init_color_pair(self, pair_id, fg, bg): + """Initialize a color pair.""" + pass + + @abstractmethod + def get_color_pair(self, pair_id): + """Get a color pair attribute.""" + pass + + @abstractmethod + def get_attr(self, name): + """Get a display attribute by name (e.g., 'A_BOLD', 'A_REVERSE').""" + pass + + +class CursesDisplay(DisplayInterface): + """Real curses display implementation.""" + + def __init__(self, stdscr): + self.stdscr = stdscr + + def get_dimensions(self): + return self.stdscr.getmaxyx() + + def clear(self): + self.stdscr.clear() + + def refresh(self): + self.stdscr.refresh() + + def redraw(self): + self.stdscr.redrawwin() + + def add_str(self, line, col, text, attr=0): + try: + height, width = self.get_dimensions() + if 0 <= line < height and 0 <= col < width: + max_len = width - col - 1 + if len(text) > max_len: + text = text[:max_len] + self.stdscr.addstr(line, col, text, attr) + except curses.error: + pass + + def get_input(self): + try: + return self.stdscr.getch() + except (KeyError, curses.error): + return -1 + + def set_nodelay(self, flag): + self.stdscr.nodelay(flag) + + def has_colors(self): + return curses.has_colors() + + def init_color_pair(self, pair_id, fg, bg): + try: + curses.init_pair(pair_id, fg, bg) + except curses.error: + pass + + def get_color_pair(self, pair_id): + return curses.color_pair(pair_id) + + def get_attr(self, name): + return getattr(curses, name, 0) + + +class MockDisplay(DisplayInterface): + """Mock display for testing.""" + + def __init__(self, height=40, width=160): + self.height = height + self.width = width + self.buffer = {} + self.cleared = False + self.refreshed = False + self.redrawn = False + self.input_queue = [] + self.nodelay_flag = True + self.colors_supported = True + self.color_pairs = {} + + def get_dimensions(self): + return (self.height, self.width) + + def clear(self): + self.buffer.clear() + self.cleared = True + + def refresh(self): + self.refreshed = True + + def redraw(self): + self.redrawn = True + + def add_str(self, line, col, text, attr=0): + if 0 <= line < self.height and 0 <= col < self.width: + max_len = self.width - col - 1 + if len(text) > max_len: + text = text[:max_len] + self.buffer[(line, col)] = (text, attr) + + def get_input(self): + if self.input_queue: + return self.input_queue.pop(0) + return -1 + + def set_nodelay(self, flag): + self.nodelay_flag = flag + + def has_colors(self): + return self.colors_supported + + def init_color_pair(self, pair_id, fg, bg): + self.color_pairs[pair_id] = (fg, bg) + + def get_color_pair(self, pair_id): + return pair_id << 8 + + def get_attr(self, name): + attrs = { + "A_NORMAL": 0, + "A_BOLD": 1 << 16, + "A_REVERSE": 1 << 17, + "A_UNDERLINE": 1 << 18, + "A_DIM": 1 << 19, + } + return attrs.get(name, 0) + + def simulate_input(self, char): + """Helper method for tests to simulate keyboard input.""" + self.input_queue.append(char) + + def get_text_at(self, line, col): + """Helper method for tests to inspect buffer content.""" + if (line, col) in self.buffer: + return self.buffer[(line, col)][0] + return None + + def get_all_lines(self): + """Get all display content as a list of lines (for testing).""" + if not self.buffer: + return [] + + max_line = max(pos[0] for pos in self.buffer.keys()) + lines = [] + for line_num in range(max_line + 1): + line_parts = [] + for col in range(self.width): + if (line_num, col) in self.buffer: + text, _ = self.buffer[(line_num, col)] + line_parts.append((col, text)) + + # Reconstruct line from parts + if line_parts: + line_parts.sort(key=lambda x: x[0]) + line = "" + last_col = 0 + for col, text in line_parts: + if col > last_col: + line += " " * (col - last_col) + line += text + last_col = col + len(text) + lines.append(line.rstrip()) + else: + lines.append("") + + # Remove trailing empty lines + while lines and not lines[-1]: + lines.pop() + + return lines + + def find_text(self, pattern): + """Find text matching pattern in buffer (for testing). Returns (line, col) or None.""" + for (line, col), (text, _) in self.buffer.items(): + if pattern in text: + return (line, col) + return None + + def contains_text(self, text): + """Check if display contains the given text anywhere (for testing).""" + return self.find_text(text) is not None diff --git a/Lib/profiling/sampling/live_collector/trend_tracker.py b/Lib/profiling/sampling/live_collector/trend_tracker.py new file mode 100644 index 00000000000..c025b83a134 --- /dev/null +++ b/Lib/profiling/sampling/live_collector/trend_tracker.py @@ -0,0 +1,157 @@ +"""TrendTracker - Encapsulated trend tracking for live profiling metrics. + +This module provides trend tracking functionality for profiling metrics, +calculating direction indicators (up/down/stable) and managing associated +visual attributes like colors. +""" + +import curses +from typing import Dict, Literal, Any + +TrendDirection = Literal["up", "down", "stable"] + + +class TrendTracker: + """ + Tracks metric trends over time and provides visual indicators. + + This class encapsulates all logic for: + - Tracking previous values of metrics + - Calculating trend directions (up/down/stable) + - Determining visual attributes (colors) for trends + - Managing enable/disable state + + Example: + tracker = TrendTracker(colors_dict) + tracker.update("func1", "nsamples", 10) + trend = tracker.get_trend("func1", "nsamples") + color = tracker.get_color("func1", "nsamples") + """ + + # Threshold for determining if a value has changed significantly + CHANGE_THRESHOLD = 0.001 + + def __init__(self, colors: Dict[str, int], enabled: bool = True): + """ + Initialize the trend tracker. + + Args: + colors: Dictionary containing color attributes including + 'trend_up', 'trend_down', 'trend_stable' + enabled: Whether trend tracking is initially enabled + """ + self._previous_values: Dict[Any, Dict[str, float]] = {} + self._enabled = enabled + self._colors = colors + + @property + def enabled(self) -> bool: + """Whether trend tracking is enabled.""" + return self._enabled + + def toggle(self) -> bool: + """ + Toggle trend tracking on/off. + + Returns: + New enabled state + """ + self._enabled = not self._enabled + return self._enabled + + def set_enabled(self, enabled: bool) -> None: + """Set trend tracking enabled state.""" + self._enabled = enabled + + def update(self, key: Any, metric: str, value: float) -> TrendDirection: + """ + Update a metric value and calculate its trend. + + Args: + key: Identifier for the entity (e.g., function) + metric: Name of the metric (e.g., 'nsamples', 'tottime') + value: Current value of the metric + + Returns: + Trend direction: 'up', 'down', or 'stable' + """ + # Initialize storage for this key if needed + if key not in self._previous_values: + self._previous_values[key] = {} + + # Get previous value, defaulting to current if not tracked yet + prev_value = self._previous_values[key].get(metric, value) + + # Calculate trend + if value > prev_value + self.CHANGE_THRESHOLD: + trend = "up" + elif value < prev_value - self.CHANGE_THRESHOLD: + trend = "down" + else: + trend = "stable" + + # Update previous value for next iteration + self._previous_values[key][metric] = value + + return trend + + def get_trend(self, key: Any, metric: str) -> TrendDirection: + """ + Get the current trend for a metric without updating. + + Args: + key: Identifier for the entity + metric: Name of the metric + + Returns: + Trend direction, or 'stable' if not tracked + """ + # This would require storing trends separately, which we don't do + # For now, return stable if not found + return "stable" + + def get_color(self, trend: TrendDirection) -> int: + """ + Get the color attribute for a trend direction. + + Args: + trend: The trend direction + + Returns: + Curses color attribute (or A_NORMAL if disabled) + """ + if not self._enabled: + return curses.A_NORMAL + + if trend == "up": + return self._colors.get("trend_up", curses.A_BOLD) + elif trend == "down": + return self._colors.get("trend_down", curses.A_BOLD) + else: # stable + return self._colors.get("trend_stable", curses.A_NORMAL) + + def update_metrics(self, key: Any, metrics: Dict[str, float]) -> Dict[str, TrendDirection]: + """ + Update multiple metrics at once and get their trends. + + Args: + key: Identifier for the entity + metrics: Dictionary of metric_name -> value + + Returns: + Dictionary of metric_name -> trend_direction + """ + trends = {} + for metric, value in metrics.items(): + trends[metric] = self.update(key, metric, value) + return trends + + def clear(self) -> None: + """Clear all tracked values (useful on stats reset).""" + self._previous_values.clear() + + def __repr__(self) -> str: + """String representation for debugging.""" + status = "enabled" if self._enabled else "disabled" + tracked = len(self._previous_values) + return f"TrendTracker({status}, tracking {tracked} entities)" diff --git a/Lib/profiling/sampling/live_collector/widgets.py b/Lib/profiling/sampling/live_collector/widgets.py new file mode 100644 index 00000000000..2af8caa2c2f --- /dev/null +++ b/Lib/profiling/sampling/live_collector/widgets.py @@ -0,0 +1,963 @@ +"""Widget classes for the live profiling collector UI.""" + +import curses +import time +from abc import ABC, abstractmethod + +from .constants import ( + TOP_FUNCTIONS_DISPLAY_COUNT, + MIN_FUNC_NAME_WIDTH, + MAX_FUNC_NAME_WIDTH, + WIDTH_THRESHOLD_SAMPLE_PCT, + WIDTH_THRESHOLD_TOTTIME, + WIDTH_THRESHOLD_CUMUL_PCT, + WIDTH_THRESHOLD_CUMTIME, + MICROSECONDS_PER_SECOND, + DISPLAY_UPDATE_INTERVAL, + MIN_BAR_WIDTH, + MAX_SAMPLE_RATE_BAR_WIDTH, + MAX_EFFICIENCY_BAR_WIDTH, + MIN_SAMPLE_RATE_FOR_SCALING, + FOOTER_LINES, + FINISHED_BANNER_EXTRA_LINES, +) +from ..constants import ( + THREAD_STATUS_HAS_GIL, + THREAD_STATUS_ON_CPU, + THREAD_STATUS_UNKNOWN, + THREAD_STATUS_GIL_REQUESTED, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + PROFILING_MODE_WALL, +) + + +class Widget(ABC): + """Base class for UI widgets.""" + + def __init__(self, display, colors): + """ + Initialize widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + """ + self.display = display + self.colors = colors + + @abstractmethod + def render(self, line, width, **kwargs): + """ + Render the widget starting at the given line. + + Args: + line: Starting line number + width: Available width + **kwargs: Additional rendering parameters + + Returns: + Next available line number after rendering + """ + pass + + def add_str(self, line, col, text, attr=0): + """Add a string to the display at the specified position.""" + self.display.add_str(line, col, text, attr) + + +class ProgressBarWidget(Widget): + """Reusable progress bar widget.""" + + def render(self, line, width, **kwargs): + """Render is not used for progress bars - use render_bar instead.""" + raise NotImplementedError("Use render_bar method instead") + + def render_bar( + self, filled, total, max_width, fill_char="█", empty_char="░" + ): + """ + Render a progress bar and return the bar string and its length. + + Args: + filled: Current filled amount + total: Total amount (max value) + max_width: Maximum width for the bar + fill_char: Character to use for filled portion + empty_char: Character to use for empty portion + + Returns: + Tuple of (bar_string, bar_length) + """ + bar_width = min(max_width, max_width) + normalized = min(filled / max(total, 1), 1.0) + bar_fill = int(normalized * bar_width) + + bar = "[" + for i in range(bar_width): + if i < bar_fill: + bar += fill_char + else: + bar += empty_char + bar += "]" + return bar, len(bar) + + +class HeaderWidget(Widget): + """Widget for rendering the header section (lines 0-8).""" + + def __init__(self, display, colors, collector): + """ + Initialize header widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + collector: Reference to LiveStatsCollector for accessing stats + """ + super().__init__(display, colors) + self.collector = collector + self.progress_bar = ProgressBarWidget(display, colors) + + def render(self, line, width, **kwargs): + """ + Render the complete header section. + + Args: + line: Starting line number + width: Available width + kwargs: Must contain 'elapsed' key + + Returns: + Next available line number + """ + elapsed = kwargs["elapsed"] + + line = self.draw_header_info(line, width, elapsed) + line = self.draw_sample_stats(line, width, elapsed) + line = self.draw_efficiency_bar(line, width) + line = self.draw_thread_status(line, width) + line = self.draw_function_stats( + line, width, kwargs.get("stats_list", []) + ) + line = self.draw_top_functions( + line, width, kwargs.get("stats_list", []) + ) + + # Show prominent finished banner if profiling is complete + if self.collector.finished: + line = self.draw_finished_banner(line, width) + + # Separator + A_DIM = self.display.get_attr("A_DIM") + separator = "─" * (width - 1) + self.add_str(line, 0, separator[: width - 1], A_DIM) + line += 1 + + return line + + def format_uptime(self, elapsed): + """Format elapsed time as uptime string.""" + uptime_sec = int(elapsed) + hours = uptime_sec // 3600 + minutes = (uptime_sec % 3600) // 60 + seconds = uptime_sec % 60 + if hours > 0: + return f"{hours}h{minutes:02d}m{seconds:02d}s" + else: + return f"{minutes}m{seconds:02d}s" + + def draw_header_info(self, line, width, elapsed): + """Draw the header information line with PID, uptime, time, and interval.""" + # Draw title + A_BOLD = self.display.get_attr("A_BOLD") + title = "Tachyon Profiler" + self.add_str(line, 0, title, A_BOLD | self.colors["cyan"]) + line += 1 + + current_time = self.collector.current_time_display + uptime = self.format_uptime(elapsed) + + # Calculate display refresh rate + refresh_hz = ( + 1.0 / self.collector.display_update_interval if self.collector.display_update_interval > 0 else 0 + ) + + # Get current view mode and thread display + if self.collector.view_mode == "ALL": + thread_name = "ALL" + thread_color = self.colors["green"] + else: + # PER_THREAD mode + if self.collector.current_thread_index < len( + self.collector.thread_ids + ): + thread_id = self.collector.thread_ids[ + self.collector.current_thread_index + ] + num_threads = len(self.collector.thread_ids) + thread_name = f"{thread_id} ({self.collector.current_thread_index + 1}/{num_threads})" + thread_color = self.colors["magenta"] + else: + thread_name = "ALL" + thread_color = self.colors["green"] + + header_parts = [ + ("PID: ", curses.A_BOLD), + (f"{self.collector.pid}", self.colors["cyan"]), + (" │ ", curses.A_DIM), + ("Thread: ", curses.A_BOLD), + (thread_name, thread_color), + (" │ ", curses.A_DIM), + ("Uptime: ", curses.A_BOLD), + (uptime, self.colors["green"]), + (" │ ", curses.A_DIM), + ("Time: ", curses.A_BOLD), + (current_time, self.colors["yellow"]), + (" │ ", curses.A_DIM), + ("Interval: ", curses.A_BOLD), + ( + f"{self.collector.sample_interval_usec}µs", + self.colors["magenta"], + ), + (" │ ", curses.A_DIM), + ("Display: ", curses.A_BOLD), + (f"{refresh_hz:.1f}Hz", self.colors["cyan"]), + ] + + col = 0 + for text, attr in header_parts: + if col < width - 1: + self.add_str(line, col, text, attr) + col += len(text) + return line + 1 + + def format_rate_with_units(self, rate_hz): + """Format a rate in Hz with appropriate units (Hz, KHz, MHz).""" + if rate_hz >= 1_000_000: + return f"{rate_hz / 1_000_000:.1f}MHz" + elif rate_hz >= 1_000: + return f"{rate_hz / 1_000:.1f}KHz" + else: + return f"{rate_hz:.1f}Hz" + + def draw_sample_stats(self, line, width, elapsed): + """Draw sample statistics with visual progress bar.""" + sample_rate = ( + self.collector.total_samples / elapsed if elapsed > 0 else 0 + ) + + # Update max sample rate + if sample_rate > self.collector.max_sample_rate: + self.collector.max_sample_rate = sample_rate + + col = 0 + self.add_str(line, col, "Samples: ", curses.A_BOLD) + col += 9 + self.add_str( + line, + col, + f"{self.collector.total_samples:>8}", + self.colors["cyan"], + ) + col += 8 + self.add_str( + line, col, f" total ({sample_rate:>7.1f}/s) ", curses.A_NORMAL + ) + col += 23 + + # Draw sample rate bar + target_rate = ( + MICROSECONDS_PER_SECOND / self.collector.sample_interval_usec + ) + + # Show current/target ratio with percentage + if sample_rate > 0 and target_rate > 0: + percentage = min((sample_rate / target_rate) * 100, 100) + current_formatted = self.format_rate_with_units(sample_rate) + target_formatted = self.format_rate_with_units(target_rate) + + if percentage >= 99.5: # Show 100% when very close + rate_label = f" {current_formatted}/{target_formatted} (100%)" + else: + rate_label = f" {current_formatted}/{target_formatted} ({percentage:>4.1f}%)" + else: + target_formatted = self.format_rate_with_units(target_rate) + rate_label = f" target: {target_formatted}" + + available_width = width - col - len(rate_label) - 3 + + if available_width >= MIN_BAR_WIDTH: + bar_width = min(MAX_SAMPLE_RATE_BAR_WIDTH, available_width) + # Use target rate as the reference, with a minimum for scaling + reference_rate = max(target_rate, MIN_SAMPLE_RATE_FOR_SCALING) + normalized_rate = min(sample_rate / reference_rate, 1.0) + bar_fill = int(normalized_rate * bar_width) + + bar = "[" + for i in range(bar_width): + bar += "█" if i < bar_fill else "░" + bar += "]" + self.add_str(line, col, bar, self.colors["green"]) + col += len(bar) + + if col + len(rate_label) < width - 1: + self.add_str(line, col + 1, rate_label, curses.A_DIM) + return line + 1 + + def draw_efficiency_bar(self, line, width): + """Draw sample efficiency bar showing success/failure rates.""" + success_pct = ( + self.collector.successful_samples + / max(1, self.collector.total_samples) + ) * 100 + failed_pct = ( + self.collector.failed_samples + / max(1, self.collector.total_samples) + ) * 100 + + col = 0 + self.add_str(line, col, "Efficiency:", curses.A_BOLD) + col += 11 + + label = f" {success_pct:>5.2f}% good, {failed_pct:>4.2f}% failed" + available_width = width - col - len(label) - 3 + + if available_width >= MIN_BAR_WIDTH: + bar_width = min(MAX_EFFICIENCY_BAR_WIDTH, available_width) + success_fill = int( + ( + self.collector.successful_samples + / max(1, self.collector.total_samples) + ) + * bar_width + ) + failed_fill = bar_width - success_fill + + self.add_str(line, col, "[", curses.A_NORMAL) + col += 1 + if success_fill > 0: + self.add_str( + line, col, "█" * success_fill, self.colors["green"] + ) + col += success_fill + if failed_fill > 0: + self.add_str(line, col, "█" * failed_fill, self.colors["red"]) + col += failed_fill + self.add_str(line, col, "]", curses.A_NORMAL) + col += 1 + + self.add_str(line, col + 1, label, curses.A_NORMAL) + return line + 1 + + def _add_percentage_stat( + self, line, col, value, label, color, add_separator=False + ): + """Add a percentage stat to the display. + + Args: + line: Line number + col: Starting column + value: Percentage value + label: Label text + color: Color attribute + add_separator: Whether to add separator before the stat + + Returns: + Updated column position + """ + if add_separator: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + + self.add_str(line, col, f"{value:>4.1f}", color) + col += 4 + self.add_str(line, col, f"% {label}", curses.A_NORMAL) + col += len(label) + 2 + + return col + + def draw_thread_status(self, line, width): + """Draw thread status statistics and GC information.""" + # Get status counts for current view mode + thread_data = self.collector._get_current_thread_data() + status_counts = thread_data.as_status_dict() if thread_data else self.collector.thread_status_counts + + # Calculate percentages + total_threads = max(1, status_counts["total"]) + pct_on_gil = (status_counts["has_gil"] / total_threads) * 100 + pct_off_gil = 100.0 - pct_on_gil + pct_gil_requested = (status_counts["gil_requested"] / total_threads) * 100 + + # Get GC percentage based on view mode + if thread_data: + total_samples = max(1, thread_data.sample_count) + pct_gc = (thread_data.gc_frame_samples / total_samples) * 100 + else: + total_samples = max(1, self.collector.total_samples) + pct_gc = (self.collector.gc_frame_samples / total_samples) * 100 + + col = 0 + self.add_str(line, col, "Threads: ", curses.A_BOLD) + col += 11 + + # Show GIL stats only if mode is not GIL (GIL mode filters to only GIL holders) + if self.collector.mode != PROFILING_MODE_GIL: + col = self._add_percentage_stat( + line, col, pct_on_gil, "on gil", self.colors["green"] + ) + col = self._add_percentage_stat( + line, + col, + pct_off_gil, + "off gil", + self.colors["red"], + add_separator=True, + ) + + # Show "waiting for gil" only if mode is not GIL + if self.collector.mode != PROFILING_MODE_GIL and col < width - 30: + col = self._add_percentage_stat( + line, + col, + pct_gil_requested, + "waiting for gil", + self.colors["yellow"], + add_separator=True, + ) + + # Always show GC stats + if col < width - 15: + col = self._add_percentage_stat( + line, + col, + pct_gc, + "GC", + self.colors["magenta"], + add_separator=(col > 11), + ) + + return line + 1 + + def draw_function_stats(self, line, width, stats_list): + """Draw function statistics summary.""" + result_set = self.collector._get_current_result_source() + total_funcs = len(result_set) + funcs_shown = len(stats_list) + executing_funcs = sum( + 1 for f in result_set.values() if f.get("direct_calls", 0) > 0 + ) + stack_only = total_funcs - executing_funcs + + col = 0 + self.add_str(line, col, "Functions: ", curses.A_BOLD) + col += 11 + self.add_str(line, col, f"{total_funcs:>5}", self.colors["cyan"]) + col += 5 + self.add_str(line, col, " total", curses.A_NORMAL) + col += 6 + + if col < width - 25: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + self.add_str( + line, col, f"{executing_funcs:>5}", self.colors["green"] + ) + col += 5 + self.add_str(line, col, " exec", curses.A_NORMAL) + col += 5 + + if col < width - 25: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + self.add_str(line, col, f"{stack_only:>5}", self.colors["yellow"]) + col += 5 + self.add_str(line, col, " stack", curses.A_NORMAL) + col += 6 + + if col < width - 20: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + self.add_str( + line, col, f"{funcs_shown:>5}", self.colors["magenta"] + ) + col += 5 + self.add_str(line, col, " shown", curses.A_NORMAL) + return line + 1 + + def draw_top_functions(self, line, width, stats_list): + """Draw top N hottest functions.""" + col = 0 + self.add_str( + line, + col, + f"Top {TOP_FUNCTIONS_DISPLAY_COUNT}: ", + curses.A_BOLD, + ) + col += 11 + + top_by_samples = sorted( + stats_list, key=lambda x: x["direct_calls"], reverse=True + ) + emojis = ["🥇", "🥈", "🥉"] + medal_colors = [ + self.colors["red"], + self.colors["yellow"], + self.colors["green"], + ] + + displayed = 0 + for func_data in top_by_samples: + if displayed >= TOP_FUNCTIONS_DISPLAY_COUNT: + break + if col >= width - 20: + break + if func_data["direct_calls"] == 0: + continue + + func_name = func_data["func"][2] + func_pct = ( + func_data["direct_calls"] + / max(1, self.collector.total_samples) + ) * 100 + + # Medal emoji + if col + 3 < width - 15: + self.add_str( + line, col, emojis[displayed] + " ", medal_colors[displayed] + ) + col += 3 + + # Function name (truncate to fit) + available_for_name = width - col - 15 + max_name_len = min(25, max(5, available_for_name)) + if len(func_name) > max_name_len: + func_name = func_name[: max_name_len - 3] + "..." + + if col + len(func_name) < width - 10: + self.add_str(line, col, func_name, medal_colors[displayed]) + col += len(func_name) + + pct_str = ( + f" ({func_pct:.1f}%)" + if func_pct >= 0.1 + else f" ({func_data['direct_calls']})" + ) + self.add_str(line, col, pct_str, curses.A_DIM) + col += len(pct_str) + + displayed += 1 + + if displayed < 3 and col < width - 30: + self.add_str(line, col, " │ ", curses.A_DIM) + col += 3 + + if displayed == 0 and col < width - 25: + self.add_str(line, col, "(collecting samples...)", curses.A_DIM) + + return line + 1 + + def draw_finished_banner(self, line, width): + """Draw a prominent banner when profiling is finished.""" + A_REVERSE = self.display.get_attr("A_REVERSE") + A_BOLD = self.display.get_attr("A_BOLD") + + # Add blank line for separation + line += 1 + + # Create the banner message + message = " ✓ PROFILING COMPLETE - Final Results Below - Press 'q' to Quit " + + # Center the message and fill the width with reverse video + if len(message) < width - 1: + padding_total = width - len(message) - 1 + padding_left = padding_total // 2 + padding_right = padding_total - padding_left + full_message = " " * padding_left + message + " " * padding_right + else: + full_message = message[: width - 1] + + # Draw the banner with reverse video and bold + self.add_str( + line, 0, full_message, A_REVERSE | A_BOLD | self.colors["green"] + ) + line += 1 + + # Add blank line for separation + line += 1 + + return line + + +class TableWidget(Widget): + """Widget for rendering column headers and data rows.""" + + def __init__(self, display, colors, collector): + """ + Initialize table widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + collector: Reference to LiveStatsCollector for accessing stats + """ + super().__init__(display, colors) + self.collector = collector + + def render(self, line, width, **kwargs): + """ + Render column headers and data rows. + + Args: + line: Starting line number + width: Available width + kwargs: Must contain 'height' and 'stats_list' keys + + Returns: + Next available line number + """ + height = kwargs["height"] + stats_list = kwargs["stats_list"] + + # Draw column headers + line, show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = ( + self.draw_column_headers(line, width) + ) + column_flags = ( + show_sample_pct, + show_tottime, + show_cumul_pct, + show_cumtime, + ) + + # Draw data rows + line = self.draw_stats_rows( + line, height, width, stats_list, column_flags + ) + + return line + + def draw_column_headers(self, line, width): + """Draw column headers with sort indicators.""" + col = 0 + + # Determine which columns to show based on width + show_sample_pct = width >= WIDTH_THRESHOLD_SAMPLE_PCT + show_tottime = width >= WIDTH_THRESHOLD_TOTTIME + show_cumul_pct = width >= WIDTH_THRESHOLD_CUMUL_PCT + show_cumtime = width >= WIDTH_THRESHOLD_CUMTIME + + sorted_header = self.colors["sorted_header"] + normal_header = self.colors["normal_header"] + + # Determine which column is sorted + sort_col = { + "nsamples": 0, + "sample_pct": 1, + "tottime": 2, + "cumul_pct": 3, + "cumtime": 4, + }.get(self.collector.sort_by, -1) + + # Column 0: nsamples + attr = sorted_header if sort_col == 0 else normal_header + text = f"{'▼nsamples' if sort_col == 0 else 'nsamples':>13}" + self.add_str(line, col, text, attr) + col += 15 + + # Column 1: sample % + if show_sample_pct: + attr = sorted_header if sort_col == 1 else normal_header + text = f"{'▼%' if sort_col == 1 else '%':>5}" + self.add_str(line, col, text, attr) + col += 7 + + # Column 2: tottime + if show_tottime: + attr = sorted_header if sort_col == 2 else normal_header + text = f"{'▼tottime' if sort_col == 2 else 'tottime':>10}" + self.add_str(line, col, text, attr) + col += 12 + + # Column 3: cumul % + if show_cumul_pct: + attr = sorted_header if sort_col == 3 else normal_header + text = f"{'▼%' if sort_col == 3 else '%':>5}" + self.add_str(line, col, text, attr) + col += 7 + + # Column 4: cumtime + if show_cumtime: + attr = sorted_header if sort_col == 4 else normal_header + text = f"{'▼cumtime' if sort_col == 4 else 'cumtime':>10}" + self.add_str(line, col, text, attr) + col += 12 + + # Remaining headers + if col < width - 15: + remaining_space = width - col - 1 + func_width = min( + MAX_FUNC_NAME_WIDTH, + max(MIN_FUNC_NAME_WIDTH, remaining_space // 2), + ) + self.add_str( + line, col, f"{'function':<{func_width}}", normal_header + ) + col += func_width + 2 + + if col < width - 10: + self.add_str(line, col, "file:line", normal_header) + + return ( + line + 1, + show_sample_pct, + show_tottime, + show_cumul_pct, + show_cumtime, + ) + + def draw_stats_rows(self, line, height, width, stats_list, column_flags): + """Draw the statistics data rows.""" + show_sample_pct, show_tottime, show_cumul_pct, show_cumtime = ( + column_flags + ) + + # Get color attributes from the colors dict (already initialized) + color_samples = self.colors.get("color_samples", curses.A_NORMAL) + color_file = self.colors.get("color_file", curses.A_NORMAL) + color_func = self.colors.get("color_func", curses.A_NORMAL) + + # Get trend tracker for color decisions + trend_tracker = self.collector._trend_tracker + + for stat in stats_list: + if line >= height - FOOTER_LINES: + break + + func = stat["func"] + direct_calls = stat["direct_calls"] + cumulative_calls = stat["cumulative_calls"] + total_time = stat["total_time"] + cumulative_time = stat["cumulative_time"] + trends = stat.get("trends", {}) + + sample_pct = ( + (direct_calls / self.collector.total_samples * 100) + if self.collector.total_samples > 0 + else 0 + ) + cum_pct = ( + (cumulative_calls / self.collector.total_samples * 100) + if self.collector.total_samples > 0 + else 0 + ) + + # Helper function to get trend color for a specific column + def get_trend_color(column_name): + trend = trends.get(column_name, "stable") + if trend_tracker is not None: + return trend_tracker.get_color(trend) + return curses.A_NORMAL + + filename, lineno, funcname = func[0], func[1], func[2] + samples_str = f"{direct_calls}/{cumulative_calls}" + col = 0 + + # Samples column - apply trend color based on nsamples trend + nsamples_color = get_trend_color("nsamples") + self.add_str(line, col, f"{samples_str:>13}", nsamples_color) + col += 15 + + # Sample % column + if show_sample_pct: + sample_pct_color = get_trend_color("sample_pct") + self.add_str(line, col, f"{sample_pct:>5.1f}", sample_pct_color) + col += 7 + + # Total time column + if show_tottime: + tottime_color = get_trend_color("tottime") + self.add_str(line, col, f"{total_time:>10.3f}", tottime_color) + col += 12 + + # Cumul % column + if show_cumul_pct: + cumul_pct_color = get_trend_color("cumul_pct") + self.add_str(line, col, f"{cum_pct:>5.1f}", cumul_pct_color) + col += 7 + + # Cumul time column + if show_cumtime: + cumtime_color = get_trend_color("cumtime") + self.add_str(line, col, f"{cumulative_time:>10.3f}", cumtime_color) + col += 12 + + # Function name column + if col < width - 15: + remaining_space = width - col - 1 + func_width = min( + MAX_FUNC_NAME_WIDTH, + max(MIN_FUNC_NAME_WIDTH, remaining_space // 2), + ) + + func_display = funcname + if len(funcname) > func_width: + func_display = funcname[: func_width - 3] + "..." + func_display = f"{func_display:<{func_width}}" + self.add_str(line, col, func_display, color_func) + col += func_width + 2 + + # File:line column + if col < width - 10: + simplified_path = self.collector.simplify_path(filename) + file_line = f"{simplified_path}:{lineno}" + remaining_width = width - col - 1 + self.add_str( + line, col, file_line[:remaining_width], color_file + ) + + line += 1 + + return line + + +class FooterWidget(Widget): + """Widget for rendering the footer section (legend and controls).""" + + def __init__(self, display, colors, collector): + """ + Initialize footer widget. + + Args: + display: DisplayInterface implementation + colors: Dictionary of color attributes + collector: Reference to LiveStatsCollector for accessing state + """ + super().__init__(display, colors) + self.collector = collector + + def render(self, line, width, **kwargs): + """ + Render the footer at the specified position. + + Args: + line: Starting line number (should be height - 2) + width: Available width + + Returns: + Next available line number + """ + A_DIM = self.display.get_attr("A_DIM") + A_BOLD = self.display.get_attr("A_BOLD") + + # Legend line + legend = "nsamples: direct/cumulative (direct=executing, cumulative=on stack)" + self.add_str(line, 0, legend[: width - 1], A_DIM) + line += 1 + + # Controls line with status + sort_names = { + "tottime": "Total Time", + "nsamples": "Direct Samples", + "cumtime": "Cumulative Time", + "sample_pct": "Sample %", + "cumul_pct": "Cumulative %", + } + sort_display = sort_names.get( + self.collector.sort_by, self.collector.sort_by + ) + + # Build status indicators + status = [] + if self.collector.finished: + status.append("[PROFILING FINISHED - Press 'q' to quit]") + elif self.collector.paused: + status.append("[PAUSED]") + if self.collector.filter_pattern: + status.append( + f"[Filter: {self.collector.filter_pattern} (c to clear)]" + ) + # Show trend colors status if disabled + if self.collector._trend_tracker is not None and not self.collector._trend_tracker.enabled: + status.append("[Trend colors: OFF]") + status_str = " ".join(status) + " " if status else "" + + if self.collector.finished: + footer = f"{status_str}" + else: + footer = f"{status_str}Sort: {sort_display} | 't':mode 'x':trends ←→:thread 'h':help 'q':quit" + self.add_str( + line, + 0, + footer[: width - 1], + A_BOLD + if (self.collector.paused or self.collector.finished) + else A_DIM, + ) + + return line + 1 + + def render_filter_input_prompt(self, line, width): + """Draw the filter input prompt at the bottom of the screen.""" + A_BOLD = self.display.get_attr("A_BOLD") + A_REVERSE = self.display.get_attr("A_REVERSE") + + # Draw prompt on last line + prompt = f"Function filter: {self.collector.filter_input_buffer}_" + self.add_str(line, 0, prompt[: width - 1], A_REVERSE | A_BOLD) + + +class HelpWidget(Widget): + """Widget for rendering the help screen overlay.""" + + def render(self, line, width, **kwargs): + """ + Render the help screen. + + Args: + line: Starting line number (ignored, help is centered) + width: Available width + kwargs: Must contain 'height' key + + Returns: + Next available line number (not used for overlays) + """ + height = kwargs["height"] + A_BOLD = self.display.get_attr("A_BOLD") + A_NORMAL = self.display.get_attr("A_NORMAL") + + help_lines = [ + ("Tachyon Profiler - Interactive Commands", A_BOLD), + ("", A_NORMAL), + ("Navigation & Display:", A_BOLD), + (" s - Cycle through sort modes (forward)", A_NORMAL), + (" S - Cycle through sort modes (backward)", A_NORMAL), + (" t - Toggle view mode (ALL / per-thread)", A_NORMAL), + (" x - Toggle trend colors (on/off)", A_NORMAL), + (" ← → ↑ ↓ - Navigate threads (in per-thread mode)", A_NORMAL), + (" + - Faster display refresh rate", A_NORMAL), + (" - - Slower display refresh rate", A_NORMAL), + ("", A_NORMAL), + ("Control:", A_BOLD), + (" p - Freeze display (snapshot)", A_NORMAL), + (" r - Reset all statistics", A_NORMAL), + ("", A_NORMAL), + ("Filtering:", A_BOLD), + (" / - Enter function filter (substring)", A_NORMAL), + (" c - Clear filter", A_NORMAL), + (" ESC - Cancel filter input", A_NORMAL), + ("", A_NORMAL), + ("Other:", A_BOLD), + (" h or ? - Show/hide this help", A_NORMAL), + (" q - Quit profiler", A_NORMAL), + ("", A_NORMAL), + ("Press any key to close this help screen", A_BOLD), + ] + + start_line = (height - len(help_lines)) // 2 + for i, (text, attr) in enumerate(help_lines): + if start_line + i < height - 1: + col = 2 # Left-align with small margin + self.add_str(start_line + i, col, text[: width - 3], attr) + + return line # Not used for overlays diff --git a/Lib/profiling/sampling/pstats_collector.py b/Lib/profiling/sampling/pstats_collector.py index e06dbf40aa1..b8b37a10c43 100644 --- a/Lib/profiling/sampling/pstats_collector.py +++ b/Lib/profiling/sampling/pstats_collector.py @@ -1,6 +1,7 @@ import collections import marshal +from _colorize import ANSIColors from .collector import Collector @@ -70,3 +71,342 @@ def create_stats(self): cumulative, callers, ) + + def print_stats(self, sort=-1, limit=None, show_summary=True, mode=None): + """Print formatted statistics to stdout.""" + import pstats + from .constants import PROFILING_MODE_CPU + + # Create stats object + stats = pstats.SampledStats(self).strip_dirs() + if not stats.stats: + print("No samples were collected.") + if mode == PROFILING_MODE_CPU: + print("This can happen in CPU mode when all threads are idle.") + return + + # Get the stats data + stats_list = [] + for func, ( + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats.stats.items(): + stats_list.append( + ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) + ) + + # Calculate total samples for percentage calculations (using direct_calls) + total_samples = sum( + direct_calls for _, direct_calls, _, _, _, _ in stats_list + ) + + # Sort based on the requested field + sort_field = sort + if sort_field == -1: # stdname + stats_list.sort(key=lambda x: str(x[0])) + elif sort_field == 0: # nsamples (direct samples) + stats_list.sort(key=lambda x: x[1], reverse=True) # direct_calls + elif sort_field == 1: # tottime + stats_list.sort(key=lambda x: x[3], reverse=True) # total_time + elif sort_field == 2: # cumtime + stats_list.sort(key=lambda x: x[4], reverse=True) # cumulative_time + elif sort_field == 3: # sample% + stats_list.sort( + key=lambda x: (x[1] / total_samples * 100) + if total_samples > 0 + else 0, + reverse=True, # direct_calls percentage + ) + elif sort_field == 4: # cumul% + stats_list.sort( + key=lambda x: (x[2] / total_samples * 100) + if total_samples > 0 + else 0, + reverse=True, # cumulative_calls percentage + ) + elif sort_field == 5: # nsamples (cumulative samples) + stats_list.sort(key=lambda x: x[2], reverse=True) # cumulative_calls + + # Apply limit if specified + if limit is not None: + stats_list = stats_list[:limit] + + # Determine the best unit for time columns based on maximum values + max_total_time = max( + (total_time for _, _, _, total_time, _, _ in stats_list), default=0 + ) + max_cumulative_time = max( + (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list), + default=0, + ) + + total_time_unit, total_time_scale = self._determine_best_unit(max_total_time) + cumulative_time_unit, cumulative_time_scale = self._determine_best_unit( + max_cumulative_time + ) + + # Define column widths for consistent alignment + col_widths = { + "nsamples": 15, # "nsamples" column (inline/cumulative format) + "sample_pct": 8, # "sample%" column + "tottime": max(12, len(f"tottime ({total_time_unit})")), + "cum_pct": 8, # "cumul%" column + "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")), + } + + # Print header with colors and proper alignment + print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}") + + header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}" + header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}" + header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}" + header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}" + header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}" + header_filename = ( + f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}" + ) + + print( + f"{header_nsamples} {header_sample_pct} {header_tottime} {header_cum_pct} {header_cumtime} {header_filename}" + ) + + # Print each line with proper alignment + for ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats_list: + # Calculate percentages + sample_pct = ( + (direct_calls / total_samples * 100) if total_samples > 0 else 0 + ) + cum_pct = ( + (cumulative_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + + # Format values with proper alignment - always use A/B format + nsamples_str = f"{direct_calls}/{cumulative_calls}" + nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}" + sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}" + tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}" + cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}" + cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}" + + # Format the function name with colors + func_name = ( + f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" + f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" + f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" + ) + + # Print the formatted line with consistent spacing + print( + f"{nsamples_str} {sample_pct_str} {tottime} {cum_pct_str} {cumtime} {func_name}" + ) + + # Print legend + print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}") + print( + f" {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)" + ) + print( + f" {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing" + ) + print( + f" {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function" + ) + print( + f" {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack" + ) + print( + f" {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)" + ) + print( + f" {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name" + ) + + # Print summary of interesting functions if enabled + if show_summary and stats_list: + self._print_summary(stats_list, total_samples) + + @staticmethod + def _determine_best_unit(max_value): + """Determine the best unit (s, ms, μs) and scale factor for a maximum value.""" + if max_value >= 1.0: + return "s", 1.0 + elif max_value >= 0.001: + return "ms", 1000.0 + else: + return "μs", 1000000.0 + + def _print_summary(self, stats_list, total_samples): + """Print summary of interesting functions.""" + print( + f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}" + ) + + # Aggregate stats by fully qualified function name (ignoring line numbers) + func_aggregated = {} + for ( + func, + direct_calls, + cumulative_calls, + total_time, + cumulative_time, + callers, + ) in stats_list: + # Use filename:function_name as the key to get fully qualified name + qualified_name = f"{func[0]}:{func[2]}" + if qualified_name not in func_aggregated: + func_aggregated[qualified_name] = [ + 0, + 0, + 0, + 0, + ] # direct_calls, cumulative_calls, total_time, cumulative_time + func_aggregated[qualified_name][0] += direct_calls + func_aggregated[qualified_name][1] += cumulative_calls + func_aggregated[qualified_name][2] += total_time + func_aggregated[qualified_name][3] += cumulative_time + + # Convert aggregated data back to list format for processing + aggregated_stats = [] + for qualified_name, ( + prim_calls, + total_calls, + total_time, + cumulative_time, + ) in func_aggregated.items(): + # Parse the qualified name back to filename and function name + if ":" in qualified_name: + filename, func_name = qualified_name.rsplit(":", 1) + else: + filename, func_name = "", qualified_name + # Create a dummy func tuple with filename and function name for display + dummy_func = (filename, "", func_name) + aggregated_stats.append( + ( + dummy_func, + prim_calls, + total_calls, + total_time, + cumulative_time, + {}, + ) + ) + + # Determine best units for summary metrics + max_total_time = max( + (total_time for _, _, _, total_time, _, _ in aggregated_stats), + default=0, + ) + max_cumulative_time = max( + ( + cumulative_time + for _, _, _, _, cumulative_time, _ in aggregated_stats + ), + default=0, + ) + + total_unit, total_scale = self._determine_best_unit(max_total_time) + cumulative_unit, cumulative_scale = self._determine_best_unit( + max_cumulative_time + ) + + def _format_func_name(func): + """Format function name with colors.""" + return ( + f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" + f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" + f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" + ) + + def _print_top_functions(stats_list, title, key_func, format_line, n=3): + """Print top N functions sorted by key_func with formatted output.""" + print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}") + sorted_stats = sorted(stats_list, key=key_func, reverse=True) + for stat in sorted_stats[:n]: + if line := format_line(stat): + print(f" {line}") + + # Functions with highest direct/cumulative ratio (hot spots) + def format_hotspots(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if direct_calls > 0 and cumulative_calls > 0: + ratio = direct_calls / cumulative_calls + direct_pct = ( + (direct_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + return ( + f"{ratio:.3f} direct/cumulative ratio, " + f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Direct/Cumulative Ratio (Hot Spots)", + key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0, + format_line=format_hotspots, + ) + + # Functions with highest call frequency (cumulative/direct difference) + def format_call_frequency(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if cumulative_calls > direct_calls: + call_frequency = cumulative_calls - direct_calls + cum_pct = ( + (cumulative_calls / total_samples * 100) + if total_samples > 0 + else 0 + ) + return ( + f"{call_frequency:d} indirect calls, " + f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Call Frequency (Indirect Calls)", + key_func=lambda x: x[2] - x[1], # Sort by (cumulative - direct) + format_line=format_call_frequency, + ) + + # Functions with highest cumulative-to-direct multiplier (call magnification) + def format_call_magnification(stat): + func, direct_calls, cumulative_calls, total_time, _, _ = stat + if direct_calls > 0 and cumulative_calls > direct_calls: + multiplier = cumulative_calls / direct_calls + indirect_calls = cumulative_calls - direct_calls + return ( + f"{multiplier:.1f}x call magnification, " + f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}" + ) + return None + + _print_top_functions( + aggregated_stats, + "Functions with Highest Call Magnification (Cumulative/Direct)", + key_func=lambda x: (x[2] / x[1]) + if x[1] > 0 + else 0, # Sort by cumulative/direct ratio + format_line=format_call_magnification, + ) diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 7a0f739a542..46fc1a05afa 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -1,9 +1,6 @@ -import argparse import _remote_debugging import os import pstats -import socket -import subprocess import statistics import sys import sysconfig @@ -13,141 +10,38 @@ from .pstats_collector import PstatsCollector from .stack_collector import CollapsedStackCollector, FlamegraphCollector +from .heatmap_collector import HeatmapCollector from .gecko_collector import GeckoCollector +from .constants import ( + PROFILING_MODE_WALL, + PROFILING_MODE_CPU, + PROFILING_MODE_GIL, + PROFILING_MODE_ALL, +) +try: + from .live_collector import LiveStatsCollector +except ImportError: + LiveStatsCollector = None _FREE_THREADED_BUILD = sysconfig.get_config_var("Py_GIL_DISABLED") is not None -# Profiling mode constants -PROFILING_MODE_WALL = 0 -PROFILING_MODE_CPU = 1 -PROFILING_MODE_GIL = 2 - - -def _parse_mode(mode_string): - """Convert mode string to mode constant.""" - mode_map = { - "wall": PROFILING_MODE_WALL, - "cpu": PROFILING_MODE_CPU, - "gil": PROFILING_MODE_GIL, - } - return mode_map[mode_string] -_HELP_DESCRIPTION = """Sample a process's stack frames and generate profiling data. -Supports the following target modes: - - -p PID: Profile an existing process by PID - - -m MODULE [ARGS...]: Profile a module as python -m module ... - - filename [ARGS...]: Profile the specified script by running it in a subprocess - -Supports the following output formats: - - --pstats: Detailed profiling statistics with sorting options - - --collapsed: Stack traces for generating flamegraphs - - --flamegraph Interactive HTML flamegraph visualization (requires web browser) - -Examples: - # Profile process 1234 for 10 seconds with default settings - python -m profiling.sampling -p 1234 - - # Profile a script by running it in a subprocess - python -m profiling.sampling myscript.py arg1 arg2 - - # Profile a module by running it as python -m module in a subprocess - python -m profiling.sampling -m mymodule arg1 arg2 - - # Profile with custom interval and duration, save to file - python -m profiling.sampling -i 50 -d 30 -o profile.stats -p 1234 - - # Generate collapsed stacks for flamegraph - python -m profiling.sampling --collapsed -p 1234 - - # Generate a HTML flamegraph - python -m profiling.sampling --flamegraph -p 1234 - - # Profile all threads, sort by total time - python -m profiling.sampling -a --sort-tottime -p 1234 - - # Profile for 1 minute with 1ms sampling interval - python -m profiling.sampling -i 1000 -d 60 -p 1234 - - # Show only top 20 functions sorted by direct samples - python -m profiling.sampling --sort-nsamples -l 20 -p 1234 - - # Profile all threads and save collapsed stacks - python -m profiling.sampling -a --collapsed -o stacks.txt -p 1234 - - # Profile with real-time sampling statistics - python -m profiling.sampling --realtime-stats -p 1234 - - # Sort by sample percentage to find most sampled functions - python -m profiling.sampling --sort-sample-pct -p 1234 - - # Sort by cumulative samples to find functions most on call stack - python -m profiling.sampling --sort-nsamples-cumul -p 1234""" - - -# Constants for socket synchronization -_SYNC_TIMEOUT = 5.0 -_PROCESS_KILL_TIMEOUT = 2.0 -_READY_MESSAGE = b"ready" -_RECV_BUFFER_SIZE = 1024 - - -def _run_with_sync(original_cmd): - """Run a command with socket-based synchronization and return the process.""" - # Create a TCP socket for synchronization with better socket options - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sync_sock: - # Set SO_REUSEADDR to avoid "Address already in use" errors - sync_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sync_sock.bind(("127.0.0.1", 0)) # Let OS choose a free port - sync_port = sync_sock.getsockname()[1] - sync_sock.listen(1) - sync_sock.settimeout(_SYNC_TIMEOUT) - - # Get current working directory to preserve it - cwd = os.getcwd() - - # Build command using the sync coordinator - target_args = original_cmd[1:] # Remove python executable - cmd = (sys.executable, "-m", "profiling.sampling._sync_coordinator", str(sync_port), cwd) + tuple(target_args) - - # Start the process with coordinator - process = subprocess.Popen(cmd) - - try: - # Wait for ready signal with timeout - with sync_sock.accept()[0] as conn: - ready_signal = conn.recv(_RECV_BUFFER_SIZE) - - if ready_signal != _READY_MESSAGE: - raise RuntimeError(f"Invalid ready signal received: {ready_signal!r}") - - except socket.timeout: - # If we timeout, kill the process and raise an error - if process.poll() is None: - process.terminate() - try: - process.wait(timeout=_PROCESS_KILL_TIMEOUT) - except subprocess.TimeoutExpired: - process.kill() - process.wait() - raise RuntimeError("Process failed to signal readiness within timeout") - - return process - - - class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL): + def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=False, gc=True, skip_non_matching_threads=True): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads + self.mode = mode # Store mode for later use if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, all_threads=self.all_threads, mode=mode + self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc, + skip_non_matching_threads=skip_non_matching_threads ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, only_active_thread=only_active_threads, mode=mode + self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc, + skip_non_matching_threads=skip_non_matching_threads ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -163,63 +57,78 @@ def sample(self, collector, duration_sec=10): last_sample_time = start_time realtime_update_interval = 1.0 # Update every second last_realtime_update = start_time + interrupted = False - while running_time < duration_sec: - current_time = time.perf_counter() - if next_time < current_time: - try: - stack_frames = self.unwinder.get_stack_trace() - collector.collect(stack_frames) - except ProcessLookupError: - duration_sec = current_time - start_time + try: + while running_time < duration_sec: + # Check if live collector wants to stop + if hasattr(collector, 'running') and not collector.running: break - except (RuntimeError, UnicodeDecodeError, MemoryError, OSError): - errors += 1 - except Exception as e: - if not self._is_process_running(): + + current_time = time.perf_counter() + if next_time < current_time: + try: + stack_frames = self.unwinder.get_stack_trace() + collector.collect(stack_frames) + except ProcessLookupError: + duration_sec = current_time - start_time break - raise e from None + except (RuntimeError, UnicodeDecodeError, MemoryError, OSError): + collector.collect_failed_sample() + errors += 1 + except Exception as e: + if not self._is_process_running(): + break + raise e from None - # Track actual sampling intervals for real-time stats - if num_samples > 0: - actual_interval = current_time - last_sample_time - self.sample_intervals.append( - 1.0 / actual_interval - ) # Convert to Hz - self.total_samples += 1 + # Track actual sampling intervals for real-time stats + if num_samples > 0: + actual_interval = current_time - last_sample_time + self.sample_intervals.append( + 1.0 / actual_interval + ) # Convert to Hz + self.total_samples += 1 - # Print real-time statistics if enabled - if ( - self.realtime_stats - and (current_time - last_realtime_update) - >= realtime_update_interval - ): - self._print_realtime_stats() - last_realtime_update = current_time + # Print real-time statistics if enabled + if ( + self.realtime_stats + and (current_time - last_realtime_update) + >= realtime_update_interval + ): + self._print_realtime_stats() + last_realtime_update = current_time - last_sample_time = current_time - num_samples += 1 - next_time += sample_interval_sec + last_sample_time = current_time + num_samples += 1 + next_time += sample_interval_sec + running_time = time.perf_counter() - start_time + except KeyboardInterrupt: + interrupted = True running_time = time.perf_counter() - start_time + print("Interrupted by user.") # Clear real-time stats line if it was being displayed if self.realtime_stats and len(self.sample_intervals) > 0: print() # Add newline after real-time stats - sample_rate = num_samples / running_time + sample_rate = num_samples / running_time if running_time > 0 else 0 error_rate = (errors / num_samples) * 100 if num_samples > 0 else 0 + expected_samples = int(duration_sec / sample_interval_sec) + missed_samples = (expected_samples - num_samples) / expected_samples * 100 if expected_samples > 0 else 0 - print(f"Captured {num_samples} samples in {running_time:.2f} seconds") - print(f"Sample rate: {sample_rate:.2f} samples/sec") - print(f"Error rate: {error_rate:.2f}%") + # Don't print stats for live mode (curses is handling display) + is_live_mode = LiveStatsCollector is not None and isinstance(collector, LiveStatsCollector) + if not is_live_mode: + print(f"Captured {num_samples} samples in {running_time:.2f} seconds") + print(f"Sample rate: {sample_rate:.2f} samples/sec") + print(f"Error rate: {error_rate:.2f}%") # Pass stats to flamegraph collector if it's the right type if hasattr(collector, 'set_stats'): - collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate) + collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, missed_samples, mode=self.mode) - expected_samples = int(duration_sec / sample_interval_sec) - if num_samples < expected_samples: + if num_samples < expected_samples and not is_live_mode and not interrupted: print( f"Warning: missed {expected_samples - num_samples} samples " f"from the expected total of {expected_samples} " @@ -274,668 +183,126 @@ def _print_realtime_stats(self): ) -def _determine_best_unit(max_value): - """Determine the best unit (s, ms, μs) and scale factor for a maximum value.""" - if max_value >= 1.0: - return "s", 1.0 - elif max_value >= 0.001: - return "ms", 1000.0 - else: - return "μs", 1000000.0 - - -def print_sampled_stats( - stats, sort=-1, limit=None, show_summary=True, sample_interval_usec=100 -): - # Get the stats data - stats_list = [] - for func, ( - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) in stats.stats.items(): - stats_list.append( - ( - func, - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) - ) - - # Calculate total samples for percentage calculations (using direct_calls) - total_samples = sum( - direct_calls for _, direct_calls, _, _, _, _ in stats_list - ) - - # Sort based on the requested field - sort_field = sort - if sort_field == -1: # stdname - stats_list.sort(key=lambda x: str(x[0])) - elif sort_field == 0: # nsamples (direct samples) - stats_list.sort(key=lambda x: x[1], reverse=True) # direct_calls - elif sort_field == 1: # tottime - stats_list.sort(key=lambda x: x[3], reverse=True) # total_time - elif sort_field == 2: # cumtime - stats_list.sort(key=lambda x: x[4], reverse=True) # cumulative_time - elif sort_field == 3: # sample% - stats_list.sort( - key=lambda x: (x[1] / total_samples * 100) - if total_samples > 0 - else 0, - reverse=True, # direct_calls percentage - ) - elif sort_field == 4: # cumul% - stats_list.sort( - key=lambda x: (x[2] / total_samples * 100) - if total_samples > 0 - else 0, - reverse=True, # cumulative_calls percentage - ) - elif sort_field == 5: # nsamples (cumulative samples) - stats_list.sort(key=lambda x: x[2], reverse=True) # cumulative_calls - - # Apply limit if specified - if limit is not None: - stats_list = stats_list[:limit] - - # Determine the best unit for time columns based on maximum values - max_total_time = max( - (total_time for _, _, _, total_time, _, _ in stats_list), default=0 - ) - max_cumulative_time = max( - (cumulative_time for _, _, _, _, cumulative_time, _ in stats_list), - default=0, - ) - - total_time_unit, total_time_scale = _determine_best_unit(max_total_time) - cumulative_time_unit, cumulative_time_scale = _determine_best_unit( - max_cumulative_time - ) - - # Define column widths for consistent alignment - col_widths = { - "nsamples": 15, # "nsamples" column (inline/cumulative format) - "sample_pct": 8, # "sample%" column - "tottime": max(12, len(f"tottime ({total_time_unit})")), - "cum_pct": 8, # "cumul%" column - "cumtime": max(12, len(f"cumtime ({cumulative_time_unit})")), - } - - # Print header with colors and proper alignment - print(f"{ANSIColors.BOLD_BLUE}Profile Stats:{ANSIColors.RESET}") - - header_nsamples = f"{ANSIColors.BOLD_BLUE}{'nsamples':>{col_widths['nsamples']}}{ANSIColors.RESET}" - header_sample_pct = f"{ANSIColors.BOLD_BLUE}{'sample%':>{col_widths['sample_pct']}}{ANSIColors.RESET}" - header_tottime = f"{ANSIColors.BOLD_BLUE}{f'tottime ({total_time_unit})':>{col_widths['tottime']}}{ANSIColors.RESET}" - header_cum_pct = f"{ANSIColors.BOLD_BLUE}{'cumul%':>{col_widths['cum_pct']}}{ANSIColors.RESET}" - header_cumtime = f"{ANSIColors.BOLD_BLUE}{f'cumtime ({cumulative_time_unit})':>{col_widths['cumtime']}}{ANSIColors.RESET}" - header_filename = ( - f"{ANSIColors.BOLD_BLUE}filename:lineno(function){ANSIColors.RESET}" - ) - - print( - f"{header_nsamples} {header_sample_pct} {header_tottime} {header_cum_pct} {header_cumtime} {header_filename}" - ) - - # Print each line with proper alignment - for ( - func, - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) in stats_list: - # Calculate percentages - sample_pct = ( - (direct_calls / total_samples * 100) if total_samples > 0 else 0 - ) - cum_pct = ( - (cumulative_calls / total_samples * 100) - if total_samples > 0 - else 0 - ) - - # Format values with proper alignment - always use A/B format - nsamples_str = f"{direct_calls}/{cumulative_calls}" - nsamples_str = f"{nsamples_str:>{col_widths['nsamples']}}" - sample_pct_str = f"{sample_pct:{col_widths['sample_pct']}.1f}" - tottime = f"{total_time * total_time_scale:{col_widths['tottime']}.3f}" - cum_pct_str = f"{cum_pct:{col_widths['cum_pct']}.1f}" - cumtime = f"{cumulative_time * cumulative_time_scale:{col_widths['cumtime']}.3f}" - - # Format the function name with colors - func_name = ( - f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" - f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" - f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" - ) - - # Print the formatted line with consistent spacing - print( - f"{nsamples_str} {sample_pct_str} {tottime} {cum_pct_str} {cumtime} {func_name}" - ) - - # Print legend - print(f"\n{ANSIColors.BOLD_BLUE}Legend:{ANSIColors.RESET}") - print( - f" {ANSIColors.YELLOW}nsamples{ANSIColors.RESET}: Direct/Cumulative samples (direct executing / on call stack)" - ) - print( - f" {ANSIColors.YELLOW}sample%{ANSIColors.RESET}: Percentage of total samples this function was directly executing" - ) - print( - f" {ANSIColors.YELLOW}tottime{ANSIColors.RESET}: Estimated total time spent directly in this function" - ) - print( - f" {ANSIColors.YELLOW}cumul%{ANSIColors.RESET}: Percentage of total samples when this function was on the call stack" - ) - print( - f" {ANSIColors.YELLOW}cumtime{ANSIColors.RESET}: Estimated cumulative time (including time in called functions)" - ) - print( - f" {ANSIColors.YELLOW}filename:lineno(function){ANSIColors.RESET}: Function location and name" - ) - - def _format_func_name(func): - """Format function name with colors.""" - return ( - f"{ANSIColors.GREEN}{func[0]}{ANSIColors.RESET}:" - f"{ANSIColors.YELLOW}{func[1]}{ANSIColors.RESET}(" - f"{ANSIColors.CYAN}{func[2]}{ANSIColors.RESET})" - ) - - def _print_top_functions(stats_list, title, key_func, format_line, n=3): - """Print top N functions sorted by key_func with formatted output.""" - print(f"\n{ANSIColors.BOLD_BLUE}{title}:{ANSIColors.RESET}") - sorted_stats = sorted(stats_list, key=key_func, reverse=True) - for stat in sorted_stats[:n]: - if line := format_line(stat): - print(f" {line}") - - # Print summary of interesting functions if enabled - if show_summary and stats_list: - print( - f"\n{ANSIColors.BOLD_BLUE}Summary of Interesting Functions:{ANSIColors.RESET}" - ) - - # Aggregate stats by fully qualified function name (ignoring line numbers) - func_aggregated = {} - for ( - func, - direct_calls, - cumulative_calls, - total_time, - cumulative_time, - callers, - ) in stats_list: - # Use filename:function_name as the key to get fully qualified name - qualified_name = f"{func[0]}:{func[2]}" - if qualified_name not in func_aggregated: - func_aggregated[qualified_name] = [ - 0, - 0, - 0, - 0, - ] # direct_calls, cumulative_calls, total_time, cumulative_time - func_aggregated[qualified_name][0] += direct_calls - func_aggregated[qualified_name][1] += cumulative_calls - func_aggregated[qualified_name][2] += total_time - func_aggregated[qualified_name][3] += cumulative_time - - # Convert aggregated data back to list format for processing - aggregated_stats = [] - for qualified_name, ( - prim_calls, - total_calls, - total_time, - cumulative_time, - ) in func_aggregated.items(): - # Parse the qualified name back to filename and function name - if ":" in qualified_name: - filename, func_name = qualified_name.rsplit(":", 1) - else: - filename, func_name = "", qualified_name - # Create a dummy func tuple with filename and function name for display - dummy_func = (filename, "", func_name) - aggregated_stats.append( - ( - dummy_func, - prim_calls, - total_calls, - total_time, - cumulative_time, - {}, - ) - ) - - # Determine best units for summary metrics - max_total_time = max( - (total_time for _, _, _, total_time, _, _ in aggregated_stats), - default=0, - ) - max_cumulative_time = max( - ( - cumulative_time - for _, _, _, _, cumulative_time, _ in aggregated_stats - ), - default=0, - ) - - total_unit, total_scale = _determine_best_unit(max_total_time) - cumulative_unit, cumulative_scale = _determine_best_unit( - max_cumulative_time - ) - - # Functions with highest direct/cumulative ratio (hot spots) - def format_hotspots(stat): - func, direct_calls, cumulative_calls, total_time, _, _ = stat - if direct_calls > 0 and cumulative_calls > 0: - ratio = direct_calls / cumulative_calls - direct_pct = ( - (direct_calls / total_samples * 100) - if total_samples > 0 - else 0 - ) - return ( - f"{ratio:.3f} direct/cumulative ratio, " - f"{direct_pct:.1f}% direct samples: {_format_func_name(func)}" - ) - return None - - _print_top_functions( - aggregated_stats, - "Functions with Highest Direct/Cumulative Ratio (Hot Spots)", - key_func=lambda x: (x[1] / x[2]) if x[2] > 0 else 0, - format_line=format_hotspots, - ) - - # Functions with highest call frequency (cumulative/direct difference) - def format_call_frequency(stat): - func, direct_calls, cumulative_calls, total_time, _, _ = stat - if cumulative_calls > direct_calls: - call_frequency = cumulative_calls - direct_calls - cum_pct = ( - (cumulative_calls / total_samples * 100) - if total_samples > 0 - else 0 - ) - return ( - f"{call_frequency:d} indirect calls, " - f"{cum_pct:.1f}% total stack presence: {_format_func_name(func)}" - ) - return None - - _print_top_functions( - aggregated_stats, - "Functions with Highest Call Frequency (Indirect Calls)", - key_func=lambda x: x[2] - x[1], # Sort by (cumulative - direct) - format_line=format_call_frequency, - ) - - # Functions with highest cumulative-to-direct multiplier (call magnification) - def format_call_magnification(stat): - func, direct_calls, cumulative_calls, total_time, _, _ = stat - if direct_calls > 0 and cumulative_calls > direct_calls: - multiplier = cumulative_calls / direct_calls - indirect_calls = cumulative_calls - direct_calls - return ( - f"{multiplier:.1f}x call magnification, " - f"{indirect_calls:d} indirect calls from {direct_calls:d} direct: {_format_func_name(func)}" - ) - return None - - _print_top_functions( - aggregated_stats, - "Functions with Highest Call Magnification (Cumulative/Direct)", - key_func=lambda x: (x[2] / x[1]) - if x[1] > 0 - else 0, # Sort by cumulative/direct ratio - format_line=format_call_magnification, - ) - - def sample( pid, + collector, *, - sort=2, - sample_interval_usec=100, duration_sec=10, - filename=None, all_threads=False, - limit=None, - show_summary=True, - output_format="pstats", realtime_stats=False, mode=PROFILING_MODE_WALL, + native=False, + gc=True, ): + """Sample a process using the provided collector. + + Args: + pid: Process ID to sample + collector: Collector instance to use for gathering samples + duration_sec: How long to sample for (seconds) + all_threads: Whether to sample all threads + realtime_stats: Whether to print real-time sampling statistics + mode: Profiling mode - WALL (all samples), CPU (only when on CPU), + GIL (only when holding GIL), ALL (includes GIL and CPU status) + native: Whether to include native frames + gc: Whether to include GC frames + + Returns: + The collector with collected samples + """ + # Get sample interval from collector + sample_interval_usec = collector.sample_interval_usec + + # PROFILING_MODE_ALL implies no skipping at all + if mode == PROFILING_MODE_ALL: + skip_non_matching_threads = False + else: + # For most modes, skip non-matching threads + # Gecko collector overrides this by setting skip_idle=False + skip_non_matching_threads = True + profiler = SampleProfiler( - pid, sample_interval_usec, all_threads=all_threads, mode=mode + pid, + sample_interval_usec, + all_threads=all_threads, + mode=mode, + native=native, + gc=gc, + skip_non_matching_threads=skip_non_matching_threads ) profiler.realtime_stats = realtime_stats - # Determine skip_idle for collector compatibility - skip_idle = mode != PROFILING_MODE_WALL - - collector = None - match output_format: - case "pstats": - collector = PstatsCollector(sample_interval_usec, skip_idle=skip_idle) - case "collapsed": - collector = CollapsedStackCollector(skip_idle=skip_idle) - filename = filename or f"collapsed.{pid}.txt" - case "flamegraph": - collector = FlamegraphCollector(skip_idle=skip_idle) - filename = filename or f"flamegraph.{pid}.html" - case "gecko": - collector = GeckoCollector(skip_idle=skip_idle) - filename = filename or f"gecko.{pid}.json" - case _: - raise ValueError(f"Invalid output format: {output_format}") - + # Run the sampling profiler.sample(collector, duration_sec) - if output_format == "pstats" and not filename: - stats = pstats.SampledStats(collector).strip_dirs() - if not stats.stats: - print("No samples were collected.") - if mode == PROFILING_MODE_CPU: - print("This can happen in CPU mode when all threads are idle.") - else: - print_sampled_stats( - stats, sort, limit, show_summary, sample_interval_usec - ) + return collector + + +def sample_live( + pid, + collector, + *, + duration_sec=10, + all_threads=False, + realtime_stats=False, + mode=PROFILING_MODE_WALL, + native=False, + gc=True, +): + """Sample a process in live/interactive mode with curses TUI. + + Args: + pid: Process ID to sample + collector: LiveStatsCollector instance + duration_sec: How long to sample for (seconds) + all_threads: Whether to sample all threads + realtime_stats: Whether to print real-time sampling statistics + mode: Profiling mode - WALL (all samples), CPU (only when on CPU), + GIL (only when holding GIL), ALL (includes GIL and CPU status) + native: Whether to include native frames + gc: Whether to include GC frames + + Returns: + The collector with collected samples + """ + import curses + + # Get sample interval from collector + sample_interval_usec = collector.sample_interval_usec + + # PROFILING_MODE_ALL implies no skipping at all + if mode == PROFILING_MODE_ALL: + skip_non_matching_threads = False else: - collector.export(filename) + skip_non_matching_threads = True - -def _validate_collapsed_format_args(args, parser): - # Check for incompatible pstats options - invalid_opts = [] - - # Get list of pstats-specific options - pstats_options = {"sort": None, "limit": None, "no_summary": False} - - # Find the default values from the argument definitions - for action in parser._actions: - if action.dest in pstats_options and hasattr(action, "default"): - pstats_options[action.dest] = action.default - - # Check if any pstats-specific options were provided by comparing with defaults - for opt, default in pstats_options.items(): - if getattr(args, opt) != default: - invalid_opts.append(opt.replace("no_", "")) - - if invalid_opts: - parser.error( - f"The following options are only valid with --pstats format: {', '.join(invalid_opts)}" - ) - - # Set default output filename for collapsed format only if we have a PID - # For module/script execution, this will be set later with the subprocess PID - if not args.outfile and args.pid is not None: - args.outfile = f"collapsed.{args.pid}.txt" - - -def wait_for_process_and_sample(pid, sort_value, args): - """Sample the process immediately since it has already signaled readiness.""" - # Set default filename with subprocess PID if not already set - filename = args.outfile - if not filename: - if args.format == "collapsed": - filename = f"collapsed.{pid}.txt" - elif args.format == "gecko": - filename = f"gecko.{pid}.json" - - mode = _parse_mode(args.mode) - - sample( + profiler = SampleProfiler( pid, - sort=sort_value, - sample_interval_usec=args.interval, - duration_sec=args.duration, - filename=filename, - all_threads=args.all_threads, - limit=args.limit, - show_summary=not args.no_summary, - output_format=args.format, - realtime_stats=args.realtime_stats, + sample_interval_usec, + all_threads=all_threads, mode=mode, + native=native, + gc=gc, + skip_non_matching_threads=skip_non_matching_threads ) + profiler.realtime_stats = realtime_stats - -def main(): - # Create the main parser - parser = argparse.ArgumentParser( - description=_HELP_DESCRIPTION, - formatter_class=argparse.RawDescriptionHelpFormatter, - ) - - # Target selection - target_group = parser.add_mutually_exclusive_group(required=False) - target_group.add_argument( - "-p", "--pid", type=int, help="Process ID to sample" - ) - target_group.add_argument( - "-m", "--module", - help="Run and profile a module as python -m module [ARGS...]" - ) - parser.add_argument( - "args", - nargs=argparse.REMAINDER, - help="Script to run and profile, with optional arguments" - ) - - # Sampling options - sampling_group = parser.add_argument_group("Sampling configuration") - sampling_group.add_argument( - "-i", - "--interval", - type=int, - default=100, - help="Sampling interval in microseconds (default: 100)", - ) - sampling_group.add_argument( - "-d", - "--duration", - type=int, - default=10, - help="Sampling duration in seconds (default: 10)", - ) - sampling_group.add_argument( - "-a", - "--all-threads", - action="store_true", - help="Sample all threads in the process instead of just the main thread", - ) - sampling_group.add_argument( - "--realtime-stats", - action="store_true", - default=False, - help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling", - ) - - # Mode options - mode_group = parser.add_argument_group("Mode options") - mode_group.add_argument( - "--mode", - choices=["wall", "cpu", "gil"], - default="wall", - help="Sampling mode: wall (all threads), cpu (only CPU-running threads), gil (only GIL-holding threads) (default: wall)", - ) - - # Output format selection - output_group = parser.add_argument_group("Output options") - output_format = output_group.add_mutually_exclusive_group() - output_format.add_argument( - "--pstats", - action="store_const", - const="pstats", - dest="format", - default="pstats", - help="Generate pstats output (default)", - ) - output_format.add_argument( - "--collapsed", - action="store_const", - const="collapsed", - dest="format", - help="Generate collapsed stack traces for flamegraphs", - ) - output_format.add_argument( - "--flamegraph", - action="store_const", - const="flamegraph", - dest="format", - help="Generate HTML flamegraph visualization", - ) - output_format.add_argument( - "--gecko", - action="store_const", - const="gecko", - dest="format", - help="Generate Gecko format for Firefox Profiler", - ) - - output_group.add_argument( - "-o", - "--outfile", - help="Save output to a file (if omitted, prints to stdout for pstats, " - "or saves to collapsed..txt or flamegraph..html for the " - "respective output formats)" - ) - - # pstats-specific options - pstats_group = parser.add_argument_group("pstats format options") - sort_group = pstats_group.add_mutually_exclusive_group() - sort_group.add_argument( - "--sort-nsamples", - action="store_const", - const=0, - dest="sort", - help="Sort by number of direct samples (nsamples column)", - ) - sort_group.add_argument( - "--sort-tottime", - action="store_const", - const=1, - dest="sort", - help="Sort by total time (tottime column)", - ) - sort_group.add_argument( - "--sort-cumtime", - action="store_const", - const=2, - dest="sort", - help="Sort by cumulative time (cumtime column, default)", - ) - sort_group.add_argument( - "--sort-sample-pct", - action="store_const", - const=3, - dest="sort", - help="Sort by sample percentage (sample%% column)", - ) - sort_group.add_argument( - "--sort-cumul-pct", - action="store_const", - const=4, - dest="sort", - help="Sort by cumulative sample percentage (cumul%% column)", - ) - sort_group.add_argument( - "--sort-nsamples-cumul", - action="store_const", - const=5, - dest="sort", - help="Sort by cumulative samples (nsamples column, cumulative part)", - ) - sort_group.add_argument( - "--sort-name", - action="store_const", - const=-1, - dest="sort", - help="Sort by function name", - ) - - pstats_group.add_argument( - "-l", - "--limit", - type=int, - help="Limit the number of rows in the output", - default=15, - ) - pstats_group.add_argument( - "--no-summary", - action="store_true", - help="Disable the summary section in the output", - ) - - args = parser.parse_args() - - # Validate format-specific arguments - if args.format in ("collapsed", "gecko"): - _validate_collapsed_format_args(args, parser) - - sort_value = args.sort if args.sort is not None else 2 - - if args.module is not None and not args.module: - parser.error("argument -m/--module: expected one argument") - - # Validate that we have exactly one target type - # Note: args can be present with -m (module arguments) but not as standalone script - has_pid = args.pid is not None - has_module = args.module is not None - has_script = bool(args.args) and args.module is None - - target_count = sum([has_pid, has_module, has_script]) - - if target_count == 0: - parser.error("one of the arguments -p/--pid -m/--module or script name is required") - elif target_count > 1: - parser.error("only one target type can be specified: -p/--pid, -m/--module, or script") - - mode = _parse_mode(args.mode) - - if args.pid: - sample( - args.pid, - sample_interval_usec=args.interval, - duration_sec=args.duration, - filename=args.outfile, - all_threads=args.all_threads, - limit=args.limit, - sort=sort_value, - show_summary=not args.no_summary, - output_format=args.format, - realtime_stats=args.realtime_stats, - mode=mode, - ) - elif args.module or args.args: - if args.module: - cmd = (sys.executable, "-m", args.module, *args.args) - else: - cmd = (sys.executable, *args.args) - - # Use synchronized process startup - process = _run_with_sync(cmd) - - # Process has already signaled readiness, start sampling immediately + def curses_wrapper_func(stdscr): + collector.init_curses(stdscr) try: - wait_for_process_and_sample(process.pid, sort_value, args) + profiler.sample(collector, duration_sec) + # Mark as finished and keep the TUI running until user presses 'q' + collector.mark_finished() + # Keep processing input until user quits + while collector.running: + collector._handle_input() + time.sleep(0.05) # Small sleep to avoid busy waiting finally: - if process.poll() is None: - process.terminate() - try: - process.wait(timeout=2) - except subprocess.TimeoutExpired: - process.kill() - process.wait() + collector.cleanup_curses() -if __name__ == "__main__": - main() + try: + curses.wrapper(curses_wrapper_func) + except KeyboardInterrupt: + pass + + return collector diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index bc38151e067..e2653609313 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -6,12 +6,14 @@ import linecache import os +from ._css_utils import get_combined_css from .collector import Collector from .string_table import StringTable class StackTraceCollector(Collector): - def __init__(self, *, skip_idle=False): + def __init__(self, sample_interval_usec, *, skip_idle=False): + self.sample_interval_usec = sample_interval_usec self.skip_idle = skip_idle def collect(self, stack_frames, skip_idle=False): @@ -36,10 +38,16 @@ def process_frames(self, frames, thread_id): def export(self, filename): lines = [] for (call_tree, thread_id), count in self.stack_counter.items(): - stack_str = ";".join( - f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree - ) - lines.append((f"tid:{thread_id};{stack_str}", count)) + parts = [f"tid:{thread_id}"] + for file, line, func in call_tree: + # This is what pstats does for "special" frames: + if file == "~" and line == 0: + part = func + else: + part = f"{os.path.basename(file)}:{func}:{line}" + parts.append(part) + stack_str = ";".join(parts) + lines.append((stack_str, count)) lines.sort(key=lambda x: (-x[1], x[0])) @@ -55,17 +63,67 @@ def __init__(self, *args, **kwargs): self.stats = {} self._root = {"samples": 0, "children": {}, "threads": set()} self._total_samples = 0 + self._sample_count = 0 # Track actual number of samples (not thread traces) self._func_intern = {} self._string_table = StringTable() self._all_threads = set() - def set_stats(self, sample_interval_usec, duration_sec, sample_rate, error_rate=None): + # Thread status statistics (similar to LiveStatsCollector) + self.thread_status_counts = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + } + self.samples_with_gc_frames = 0 + + # Per-thread statistics + self.per_thread_stats = {} # {thread_id: {has_gil, on_cpu, gil_requested, unknown, total, gc_samples}} + + def collect(self, stack_frames, skip_idle=False): + """Override to track thread status statistics before processing frames.""" + # Increment sample count once per sample + self._sample_count += 1 + + # Collect both aggregate and per-thread statistics using base method + status_counts, has_gc_frame, per_thread_stats = self._collect_thread_status_stats(stack_frames) + + # Merge aggregate status counts + for key in status_counts: + self.thread_status_counts[key] += status_counts[key] + + # Update aggregate GC frame count + if has_gc_frame: + self.samples_with_gc_frames += 1 + + # Merge per-thread statistics + for thread_id, stats in per_thread_stats.items(): + if thread_id not in self.per_thread_stats: + self.per_thread_stats[thread_id] = { + "has_gil": 0, + "on_cpu": 0, + "gil_requested": 0, + "unknown": 0, + "total": 0, + "gc_samples": 0, + } + for key, value in stats.items(): + self.per_thread_stats[thread_id][key] += value + + # Call parent collect to process frames + super().collect(stack_frames, skip_idle=skip_idle) + + def set_stats(self, sample_interval_usec, duration_sec, sample_rate, + error_rate=None, missed_samples=None, mode=None): """Set profiling statistics to include in flamegraph data.""" self.stats = { "sample_interval_usec": sample_interval_usec, "duration_sec": duration_sec, "sample_rate": sample_rate, - "error_rate": error_rate + "error_rate": error_rate, + "missed_samples": missed_samples, + "mode": mode } def export(self, filename): @@ -98,6 +156,10 @@ def export(self, filename): def _format_function_name(func): filename, lineno, funcname = func + # Special frames like and should not show file:line + if filename == "~" and lineno == 0: + return funcname + if len(filename) > 50: parts = filename.split("/") if len(parts) > 2: @@ -106,7 +168,6 @@ def _format_function_name(func): return f"{funcname} ({filename}:{lineno})" def _convert_to_flamegraph_format(self): - """Convert aggregated trie to d3-flamegraph format with string table optimization.""" if self._total_samples == 0: return { "name": self._string_table.intern("No Data"), @@ -167,6 +228,29 @@ def convert_children(children, min_samples): "strings": self._string_table.get_strings() } + # Calculate thread status percentages for display + total_threads = max(1, self.thread_status_counts["total"]) + thread_stats = { + "has_gil_pct": (self.thread_status_counts["has_gil"] / total_threads) * 100, + "on_cpu_pct": (self.thread_status_counts["on_cpu"] / total_threads) * 100, + "gil_requested_pct": (self.thread_status_counts["gil_requested"] / total_threads) * 100, + "gc_pct": (self.samples_with_gc_frames / max(1, self._sample_count)) * 100, + **self.thread_status_counts + } + + # Calculate per-thread statistics with percentages + per_thread_stats_with_pct = {} + total_samples_denominator = max(1, self._sample_count) + for thread_id, stats in self.per_thread_stats.items(): + total = max(1, stats["total"]) + per_thread_stats_with_pct[thread_id] = { + "has_gil_pct": (stats["has_gil"] / total) * 100, + "on_cpu_pct": (stats["on_cpu"] / total) * 100, + "gil_requested_pct": (stats["gil_requested"] / total) * 100, + "gc_pct": (stats["gc_samples"] / total_samples_denominator) * 100, + **stats + } + # If we only have one root child, make it the root to avoid redundant level if len(root_children) == 1: main_child = root_children[0] @@ -174,7 +258,11 @@ def convert_children(children, min_samples): old_name = self._string_table.get_string(main_child["name"]) new_name = f"Program Root: {old_name}" main_child["name"] = self._string_table.intern(new_name) - main_child["stats"] = self.stats + main_child["stats"] = { + **self.stats, + "thread_stats": thread_stats, + "per_thread_stats": per_thread_stats_with_pct + } main_child["threads"] = sorted(list(self._all_threads)) main_child["strings"] = self._string_table.get_strings() return main_child @@ -183,7 +271,11 @@ def convert_children(children, min_samples): "name": self._string_table.intern("Program Root"), "value": total_samples, "children": root_children, - "stats": self.stats, + "stats": { + **self.stats, + "thread_stats": thread_stats, + "per_thread_stats": per_thread_stats_with_pct + }, "threads": sorted(list(self._all_threads)), "strings": self._string_table.get_strings() } @@ -240,9 +332,9 @@ def _create_flamegraph_html(self, data): fg_js_path = d3_flame_graph_dir / "d3-flamegraph.min.js" fg_tooltip_js_path = d3_flame_graph_dir / "d3-flamegraph-tooltip.min.js" - html_template = (template_dir / "flamegraph_template.html").read_text(encoding="utf-8") - css_content = (template_dir / "flamegraph.css").read_text(encoding="utf-8") - js_content = (template_dir / "flamegraph.js").read_text(encoding="utf-8") + html_template = (template_dir / "_flamegraph_assets" / "flamegraph_template.html").read_text(encoding="utf-8") + css_content = get_combined_css("flamegraph") + js_content = (template_dir / "_flamegraph_assets" / "flamegraph.js").read_text(encoding="utf-8") # Inline first-party CSS/JS html_template = html_template.replace( diff --git a/Lib/profiling/tracing/__init__.py b/Lib/profiling/tracing/__init__.py index 2dc7ea92c8c..a6b8edf7216 100644 --- a/Lib/profiling/tracing/__init__.py +++ b/Lib/profiling/tracing/__init__.py @@ -185,7 +185,7 @@ def main(): progname = args[0] sys.path.insert(0, os.path.dirname(progname)) with io.open_code(progname) as fp: - code = compile(fp.read(), progname, 'exec') + code = compile(fp.read(), progname, 'exec', module='__main__') spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, origin=progname) module = importlib.util.module_from_spec(spec) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 989fbd517d8..45ff5fca308 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -108,96 +108,10 @@ def pathdirs(): normdirs.append(normdir) return dirs -def _findclass(func): - cls = sys.modules.get(func.__module__) - if cls is None: - return None - for name in func.__qualname__.split('.')[:-1]: - cls = getattr(cls, name) - if not inspect.isclass(cls): - return None - return cls - -def _finddoc(obj): - if inspect.ismethod(obj): - name = obj.__func__.__name__ - self = obj.__self__ - if (inspect.isclass(self) and - getattr(getattr(self, name, None), '__func__') is obj.__func__): - # classmethod - cls = self - else: - cls = self.__class__ - elif inspect.isfunction(obj): - name = obj.__name__ - cls = _findclass(obj) - if cls is None or getattr(cls, name) is not obj: - return None - elif inspect.isbuiltin(obj): - name = obj.__name__ - self = obj.__self__ - if (inspect.isclass(self) and - self.__qualname__ + '.' + name == obj.__qualname__): - # classmethod - cls = self - else: - cls = self.__class__ - # Should be tested before isdatadescriptor(). - elif isinstance(obj, property): - name = obj.__name__ - cls = _findclass(obj.fget) - if cls is None or getattr(cls, name) is not obj: - return None - elif inspect.ismethoddescriptor(obj) or inspect.isdatadescriptor(obj): - name = obj.__name__ - cls = obj.__objclass__ - if getattr(cls, name) is not obj: - return None - if inspect.ismemberdescriptor(obj): - slots = getattr(cls, '__slots__', None) - if isinstance(slots, dict) and name in slots: - return slots[name] - else: - return None - for base in cls.__mro__: - try: - doc = _getowndoc(getattr(base, name)) - except AttributeError: - continue - if doc is not None: - return doc - return None - -def _getowndoc(obj): - """Get the documentation string for an object if it is not - inherited from its class.""" - try: - doc = object.__getattribute__(obj, '__doc__') - if doc is None: - return None - if obj is not type: - typedoc = type(obj).__doc__ - if isinstance(typedoc, str) and typedoc == doc: - return None - return doc - except AttributeError: - return None - def _getdoc(object): - """Get the documentation string for an object. - - All tabs are expanded to spaces. To clean up docstrings that are - indented to line up with blocks of code, any whitespace than can be - uniformly removed from the second line onwards is removed.""" - doc = _getowndoc(object) - if doc is None: - try: - doc = _finddoc(object) - except (AttributeError, TypeError): - return None - if not isinstance(doc, str): - return None - return inspect.cleandoc(doc) + return inspect.getdoc(object, + fallback_to_class_doc=False, + inherit_class_doc=False) def getdoc(object): """Get the doc string or comments for an object.""" diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 293c3189589..11ffc6bf3a1 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Tue Oct 14 13:46:01 2025 +# Autogenerated by Sphinx on Tue Nov 18 16:51:09 2025 # as part of the release process. topics = { @@ -1098,10 +1098,10 @@ class and instance attributes applies as for regular assignments. 'bltin-ellipsis-object': r'''The Ellipsis Object ******************* -This object is commonly used used to indicate that something is -omitted. It supports no special operations. There is exactly one -ellipsis object, named "Ellipsis" (a built-in name). -"type(Ellipsis)()" produces the "Ellipsis" singleton. +This object is commonly used to indicate that something is omitted. It +supports no special operations. There is exactly one ellipsis object, +named "Ellipsis" (a built-in name). "type(Ellipsis)()" produces the +"Ellipsis" singleton. It is written as "Ellipsis" or "...". @@ -4140,6 +4140,10 @@ def double(x): available for commands and command arguments, e.g. the current global and local names are offered as arguments of the "p" command. + +Command-line interface +====================== + You can also invoke "pdb" from the command line to debug other scripts. For example: @@ -4155,7 +4159,7 @@ def double(x): -c, --command To execute commands as if given in a ".pdbrc" file; see Debugger - Commands. + commands. Changed in version 3.2: Added the "-c" option. @@ -4376,7 +4380,7 @@ class pdb.Pdb(completekey='tab', stdin=None, stdout=None, skip=None, nosigint=Fa See the documentation for the functions explained above. -Debugger Commands +Debugger commands ================= The commands recognized by the debugger are listed below. Most @@ -5616,9 +5620,8 @@ class of the instance or a *non-virtual base class* thereof. The 2.71828 4.0 -Unlike in integer literals, leading zeros are allowed in the numeric -parts. For example, "077.010" is legal, and denotes the same number as -"77.10". +Unlike in integer literals, leading zeros are allowed. For example, +"077.010" is legal, and denotes the same number as "77.01". As in integer literals, single underscores may occur between digits to help readability: @@ -7435,9 +7438,8 @@ class body. A "SyntaxError" is raised if a variable is used or 2.71828 4.0 -Unlike in integer literals, leading zeros are allowed in the numeric -parts. For example, "077.010" is legal, and denotes the same number as -"77.10". +Unlike in integer literals, leading zeros are allowed. For example, +"077.010" is legal, and denotes the same number as "77.01". As in integer literals, single underscores may occur between digits to help readability: @@ -7685,9 +7687,8 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is ************************* *Objects* are Python’s abstraction for data. All data in a Python -program is represented by objects or by relations between objects. (In -a sense, and in conformance to Von Neumann’s model of a “stored -program computer”, code is also represented by objects.) +program is represented by objects or by relations between objects. +Even code is represented by objects. Every object has an identity, a type and a value. An object’s *identity* never changes once it has been created; you may think of it @@ -10301,6 +10302,17 @@ class is used in a class pattern with positional arguments, each follow uncased characters and lowercase characters only cased ones. Return "False" otherwise. + For example: + + >>> 'Spam, Spam, Spam'.istitle() + True + >>> 'spam, spam, spam'.istitle() + False + >>> 'SPAM, SPAM, SPAM'.istitle() + False + + See also "title()". + str.isupper() Return "True" if all cased characters [4] in the string are @@ -10663,6 +10675,8 @@ class is used in a class pattern with positional arguments, each >>> titlecase("they're bill's friends.") "They're Bill's Friends." + See also "istitle()". + str.translate(table, /) Return a copy of the string in which each character has been mapped @@ -12362,6 +12376,11 @@ class method object, it is transformed into an instance method object | | "X.__bases__" will be exactly equal to "(A, B, | | | C)". | +----------------------------------------------------+----------------------------------------------------+ +| type.__base__ | **CPython implementation detail:** The single base | +| | class in the inheritance chain that is responsible | +| | for the memory layout of instances. This attribute | +| | corresponds to "tp_base" at the C level. | ++----------------------------------------------------+----------------------------------------------------+ | type.__doc__ | The class’s documentation string, or "None" if | | | undefined. Not inherited by subclasses. | +----------------------------------------------------+----------------------------------------------------+ diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index a5316391297..ecec16e9005 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -397,9 +397,12 @@ def __init__(self, lexicon, flags=0): s = _parser.State() s.flags = flags for phrase, action in lexicon: + sub_pattern = _parser.parse(phrase, flags) + if sub_pattern.state.groups != 1: + raise ValueError("Cannot use capturing groups in re.Scanner") gid = s.opengroup() p.append(_parser.SubPattern(s, [ - (SUBPATTERN, (gid, 0, 0, _parser.parse(phrase, flags))), + (SUBPATTERN, (gid, 0, 0, sub_pattern)), ])) s.closegroup(gid, p[-1]) p = _parser.SubPattern(s, [(BRANCH, (None, p))]) diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py index 20dd561d1c1..c2ca8e25abe 100644 --- a/Lib/re/_compiler.py +++ b/Lib/re/_compiler.py @@ -375,7 +375,7 @@ def _optimize_charset(charset, iscased=None, fixup=None, fixes=None): # less significant byte is a bit index in the chunk (just like the # CHARSET matching). - charmap = bytes(charmap) # should be hashable + charmap = charmap.take_bytes() # should be hashable comps = {} mapping = bytearray(256) block = 0 diff --git a/Lib/runpy.py b/Lib/runpy.py index ef54d3282ee..f072498f6cb 100644 --- a/Lib/runpy.py +++ b/Lib/runpy.py @@ -247,7 +247,7 @@ def _get_main_module_details(error=ImportError): sys.modules[main_name] = saved_main -def _get_code_from_file(fname): +def _get_code_from_file(fname, module): # Check for a compiled file first from pkgutil import read_code code_path = os.path.abspath(fname) @@ -256,7 +256,7 @@ def _get_code_from_file(fname): if code is None: # That didn't work, so try it as normal source code with io.open_code(code_path) as f: - code = compile(f.read(), fname, 'exec') + code = compile(f.read(), fname, 'exec', module=module) return code def run_path(path_name, init_globals=None, run_name=None): @@ -283,7 +283,7 @@ def run_path(path_name, init_globals=None, run_name=None): if isinstance(importer, type(None)): # Not a valid sys.path entry, so run the code directly # execfile() doesn't help as we want to allow compiled files - code = _get_code_from_file(path_name) + code = _get_code_from_file(path_name, run_name) return _run_module_code(code, init_globals, run_name, pkg_name=pkg_name, script_name=path_name) else: diff --git a/Lib/shelve.py b/Lib/shelve.py index 1010be1e09d..9f6296667fd 100644 --- a/Lib/shelve.py +++ b/Lib/shelve.py @@ -57,7 +57,6 @@ """ from pickle import DEFAULT_PROTOCOL, dumps, loads -from io import BytesIO import collections.abc diff --git a/Lib/smtplib.py b/Lib/smtplib.py index 808f0fd47e8..72093f7f8b0 100644 --- a/Lib/smtplib.py +++ b/Lib/smtplib.py @@ -917,7 +917,7 @@ def send_message(self, msg, from_addr=None, to_addrs=None, The arguments are as for sendmail, except that msg is an email.message.Message object. If from_addr is None or to_addrs is None, these arguments are taken from the headers of the Message as - described in RFC 2822 (a ValueError is raised if there is more than + described in RFC 5322 (a ValueError is raised if there is more than one set of 'Resent-' headers). Regardless of the values of from_addr and to_addr, any Bcc field (or Resent-Bcc field, when the Message is a resent) of the Message object won't be transmitted. The Message @@ -931,7 +931,7 @@ def send_message(self, msg, from_addr=None, to_addrs=None, policy. """ - # 'Resent-Date' is a mandatory field if the Message is resent (RFC 2822 + # 'Resent-Date' is a mandatory field if the Message is resent (RFC 5322 # Section 3.6.6). In such a case, we use the 'Resent-*' fields. However, # if there is more than one 'Resent-' block there's no way to # unambiguously determine which one is the most recent in all cases, @@ -950,7 +950,7 @@ def send_message(self, msg, from_addr=None, to_addrs=None, else: raise ValueError("message has more than one 'Resent-' header block") if from_addr is None: - # Prefer the sender field per RFC 2822:3.6.2. + # Prefer the sender field per RFC 5322 section 3.6.2. from_addr = (msg[header_prefix + 'Sender'] if (header_prefix + 'Sender') in msg else msg[header_prefix + 'From']) diff --git a/Lib/ssl.py b/Lib/ssl.py index 7ad7969a821..67a2990b281 100644 --- a/Lib/ssl.py +++ b/Lib/ssl.py @@ -185,7 +185,7 @@ class _TLSContentType: class _TLSAlertType: """Alert types for TLSContentType.ALERT messages - See RFC 8466, section B.2 + See RFC 8446, section B.2 """ CLOSE_NOTIFY = 0 UNEXPECTED_MESSAGE = 10 diff --git a/Lib/statistics.py b/Lib/statistics.py index 3d805cb0739..26cf925529e 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -619,9 +619,14 @@ def stdev(data, xbar=None): if n < 2: raise StatisticsError('stdev requires at least two data points') mss = ss / (n - 1) + try: + mss_numerator = mss.numerator + mss_denominator = mss.denominator + except AttributeError: + raise ValueError('inf or nan encountered in data') if issubclass(T, Decimal): - return _decimal_sqrt_of_frac(mss.numerator, mss.denominator) - return _float_sqrt_of_frac(mss.numerator, mss.denominator) + return _decimal_sqrt_of_frac(mss_numerator, mss_denominator) + return _float_sqrt_of_frac(mss_numerator, mss_denominator) def pstdev(data, mu=None): @@ -637,9 +642,14 @@ def pstdev(data, mu=None): if n < 1: raise StatisticsError('pstdev requires at least one data point') mss = ss / n + try: + mss_numerator = mss.numerator + mss_denominator = mss.denominator + except AttributeError: + raise ValueError('inf or nan encountered in data') if issubclass(T, Decimal): - return _decimal_sqrt_of_frac(mss.numerator, mss.denominator) - return _float_sqrt_of_frac(mss.numerator, mss.denominator) + return _decimal_sqrt_of_frac(mss_numerator, mss_denominator) + return _float_sqrt_of_frac(mss_numerator, mss_denominator) ## Statistics for relations between two inputs ############################# diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 79251bd5310..4d5ab6fbff0 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -1613,6 +1613,10 @@ def _readerthread(self, fh, buffer): fh.close() + def _writerthread(self, input): + self._stdin_write(input) + + def _communicate(self, input, endtime, orig_timeout): # Start reader threads feeding into a list hanging off of this # object, unless they've already been started. @@ -1631,8 +1635,23 @@ def _communicate(self, input, endtime, orig_timeout): self.stderr_thread.daemon = True self.stderr_thread.start() - if self.stdin: - self._stdin_write(input) + # Start writer thread to send input to stdin, unless already + # started. The thread writes input and closes stdin when done, + # or continues in the background on timeout. + if self.stdin and not hasattr(self, "_stdin_thread"): + self._stdin_thread = \ + threading.Thread(target=self._writerthread, + args=(input,)) + self._stdin_thread.daemon = True + self._stdin_thread.start() + + # Wait for the writer thread, or time out. If we time out, the + # thread remains writing and the fd left open in case the user + # calls communicate again. + if hasattr(self, "_stdin_thread"): + self._stdin_thread.join(self._remaining_time(endtime)) + if self._stdin_thread.is_alive(): + raise TimeoutExpired(self.args, orig_timeout) # Wait for the reader threads, or time out. If we time out, the # threads remain reading and the fds left open in case the user @@ -2077,6 +2096,10 @@ def _communicate(self, input, endtime, orig_timeout): self.stdin.flush() except BrokenPipeError: pass # communicate() must ignore BrokenPipeError. + except ValueError: + # ignore ValueError: I/O operation on closed file. + if not self.stdin.closed: + raise if not input: try: self.stdin.close() @@ -2102,10 +2125,13 @@ def _communicate(self, input, endtime, orig_timeout): self._save_input(input) if self._input: - input_view = memoryview(self._input) + if not isinstance(self._input, memoryview): + input_view = memoryview(self._input) + else: + input_view = self._input.cast("b") # byte input required with _PopenSelector() as selector: - if self.stdin and input: + if self.stdin and not self.stdin.closed and self._input: selector.register(self.stdin, selectors.EVENT_WRITE) if self.stdout and not self.stdout.closed: selector.register(self.stdout, selectors.EVENT_READ) @@ -2138,7 +2164,7 @@ def _communicate(self, input, endtime, orig_timeout): selector.unregister(key.fileobj) key.fileobj.close() else: - if self._input_offset >= len(self._input): + if self._input_offset >= len(input_view): selector.unregister(key.fileobj) key.fileobj.close() elif key.fileobj in (self.stdout, self.stderr): diff --git a/Lib/symtable.py b/Lib/symtable.py index 77475c3ffd9..4c832e68f94 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -17,13 +17,13 @@ __all__ = ["symtable", "SymbolTableType", "SymbolTable", "Class", "Function", "Symbol"] -def symtable(code, filename, compile_type): +def symtable(code, filename, compile_type, *, module=None): """ Return the toplevel *SymbolTable* for the source code. *filename* is the name of the file with the code and *compile_type* is the *compile()* mode argument. """ - top = _symtable.symtable(code, filename, compile_type) + top = _symtable.symtable(code, filename, compile_type, module=module) return _newSymbolTable(top, filename) class SymbolTableFactory: diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 850744e47d0..d03eb1dfb25 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -39,6 +39,7 @@ from test.support import socket_helper from test.support import threading_helper from test.support import warnings_helper +from test.support import subTests from test.support.script_helper import assert_python_failure, assert_python_ok # Skip tests if _multiprocessing wasn't built. @@ -1204,7 +1205,7 @@ def test_put(self): @classmethod def _test_get(cls, queue, child_can_start, parent_can_continue): child_can_start.wait() - #queue.put(1) + queue.put(1) queue.put(2) queue.put(3) queue.put(4) @@ -1229,15 +1230,16 @@ def test_get(self): child_can_start.set() parent_can_continue.wait() - time.sleep(DELTA) + for _ in support.sleeping_retry(support.SHORT_TIMEOUT): + if not queue_empty(queue): + break self.assertEqual(queue_empty(queue), False) - # Hangs unexpectedly, remove for now - #self.assertEqual(queue.get(), 1) + self.assertEqual(queue.get_nowait(), 1) self.assertEqual(queue.get(True, None), 2) self.assertEqual(queue.get(True), 3) self.assertEqual(queue.get(timeout=1), 4) - self.assertEqual(queue.get_nowait(), 5) + self.assertEqual(queue.get(), 5) self.assertEqual(queue_empty(queue), True) @@ -4382,6 +4384,19 @@ def test_copy(self): self.assertEqual(bar.z, 2 ** 33) +def resource_tracker_format_subtests(func): + """Run given test using both resource tracker communication formats""" + def _inner(self, *args, **kwargs): + tracker = resource_tracker._resource_tracker + for use_simple_format in False, True: + with ( + self.subTest(use_simple_format=use_simple_format), + unittest.mock.patch.object( + tracker, '_use_simple_format', use_simple_format) + ): + func(self, *args, **kwargs) + return _inner + @unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory") @hashlib_helper.requires_hashdigest('sha256') class _TestSharedMemory(BaseTestCase): @@ -4661,6 +4676,7 @@ def test_shared_memory_SharedMemoryServer_ignores_sigint(self): smm.shutdown() @unittest.skipIf(os.name != "posix", "resource_tracker is posix only") + @resource_tracker_format_subtests def test_shared_memory_SharedMemoryManager_reuses_resource_tracker(self): # bpo-36867: test that a SharedMemoryManager uses the # same resource_tracker process as its parent. @@ -4912,6 +4928,7 @@ def test_shared_memory_cleaned_after_process_termination(self): "shared_memory objects to clean up at shutdown", err) @unittest.skipIf(os.name != "posix", "resource_tracker is posix only") + @resource_tracker_format_subtests def test_shared_memory_untracking(self): # gh-82300: When a separate Python process accesses shared memory # with track=False, it must not cause the memory to be deleted @@ -4939,6 +4956,7 @@ def test_shared_memory_untracking(self): mem.close() @unittest.skipIf(os.name != "posix", "resource_tracker is posix only") + @resource_tracker_format_subtests def test_shared_memory_tracking(self): # gh-82300: When a separate Python process accesses shared memory # with track=True, it must cause the memory to be deleted when @@ -6956,28 +6974,13 @@ def test_std_streams_flushed_after_preload(self): if multiprocessing.get_start_method() != "forkserver": self.skipTest("forkserver specific test") - # Create a test module in the temporary directory on the child's path - # TODO: This can all be simplified once gh-126631 is fixed and we can - # use __main__ instead of a module. - dirname = os.path.join(self._temp_dir, 'preloaded_module') - init_name = os.path.join(dirname, '__init__.py') - os.mkdir(dirname) - with open(init_name, "w") as f: - cmd = '''if 1: - import sys - print('stderr', end='', file=sys.stderr) - print('stdout', end='', file=sys.stdout) - ''' - f.write(cmd) - name = os.path.join(os.path.dirname(__file__), 'mp_preload_flush.py') - env = {'PYTHONPATH': self._temp_dir} - _, out, err = test.support.script_helper.assert_python_ok(name, **env) + _, out, err = test.support.script_helper.assert_python_ok(name) # Check stderr first, as it is more likely to be useful to see in the # event of a failure. - self.assertEqual(err.decode().rstrip(), 'stderr') - self.assertEqual(out.decode().rstrip(), 'stdout') + self.assertEqual(err.decode().rstrip(), '__main____mp_main__') + self.assertEqual(out.decode().rstrip(), '__main____mp_main__') class MiscTestCase(unittest.TestCase): @@ -7364,3 +7367,51 @@ def test_forkpty(self): res = assert_python_failure("-c", code, PYTHONWARNINGS='error') self.assertIn(b'DeprecationWarning', res.err) self.assertIn(b'is multi-threaded, use of forkpty() may lead to deadlocks in the child', res.err) + +@unittest.skipUnless(HAS_SHMEM, "requires multiprocessing.shared_memory") +class TestSharedMemoryNames(unittest.TestCase): + @subTests('use_simple_format', (True, False)) + def test_that_shared_memory_name_with_colons_has_no_resource_tracker_errors( + self, use_simple_format): + # Test script that creates and cleans up shared memory with colon in name + test_script = textwrap.dedent(""" + import sys + from multiprocessing import shared_memory + from multiprocessing import resource_tracker + import time + + resource_tracker._resource_tracker._use_simple_format = %s + + # Test various patterns of colons in names + test_names = [ + "a:b", + "a:b:c", + "test:name:with:many:colons", + ":starts:with:colon", + "ends:with:colon:", + "::double::colons::", + "name\\nwithnewline", + "name-with-trailing-newline\\n", + "\\nname-starts-with-newline", + "colons:and\\nnewlines:mix", + "multi\\nline\\nname", + ] + + for name in test_names: + try: + shm = shared_memory.SharedMemory(create=True, size=100, name=name) + shm.buf[:5] = b'hello' # Write something to the shared memory + shm.close() + shm.unlink() + + except Exception as e: + print(f"Error with name '{name}': {e}", file=sys.stderr) + sys.exit(1) + + print("SUCCESS") + """ % use_simple_format) + + rc, out, err = assert_python_ok("-c", test_script) + self.assertIn(b"SUCCESS", out) + self.assertNotIn(b"traceback", err.lower(), err) + self.assertNotIn(b"resource_tracker.py", err, err) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 7df27206206..ace56aab7ac 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -6300,21 +6300,21 @@ def test_vilnius_1941_fromutc(self): gdt = datetime(1941, 6, 23, 20, 59, 59, tzinfo=timezone.utc) ldt = gdt.astimezone(Vilnius) - self.assertEqual(ldt.strftime("%c %Z%z"), + self.assertEqual(ldt.strftime("%a %b %d %H:%M:%S %Y %Z%z"), 'Mon Jun 23 23:59:59 1941 MSK+0300') self.assertEqual(ldt.fold, 0) self.assertFalse(ldt.dst()) gdt = datetime(1941, 6, 23, 21, tzinfo=timezone.utc) ldt = gdt.astimezone(Vilnius) - self.assertEqual(ldt.strftime("%c %Z%z"), + self.assertEqual(ldt.strftime("%a %b %d %H:%M:%S %Y %Z%z"), 'Mon Jun 23 23:00:00 1941 CEST+0200') self.assertEqual(ldt.fold, 1) self.assertTrue(ldt.dst()) gdt = datetime(1941, 6, 23, 22, tzinfo=timezone.utc) ldt = gdt.astimezone(Vilnius) - self.assertEqual(ldt.strftime("%c %Z%z"), + self.assertEqual(ldt.strftime("%a %b %d %H:%M:%S %Y %Z%z"), 'Tue Jun 24 00:00:00 1941 CEST+0200') self.assertEqual(ldt.fold, 0) self.assertTrue(ldt.dst()) @@ -6324,22 +6324,22 @@ def test_vilnius_1941_toutc(self): ldt = datetime(1941, 6, 23, 22, 59, 59, tzinfo=Vilnius) gdt = ldt.astimezone(timezone.utc) - self.assertEqual(gdt.strftime("%c %Z"), + self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"), 'Mon Jun 23 19:59:59 1941 UTC') ldt = datetime(1941, 6, 23, 23, 59, 59, tzinfo=Vilnius) gdt = ldt.astimezone(timezone.utc) - self.assertEqual(gdt.strftime("%c %Z"), + self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"), 'Mon Jun 23 20:59:59 1941 UTC') ldt = datetime(1941, 6, 23, 23, 59, 59, tzinfo=Vilnius, fold=1) gdt = ldt.astimezone(timezone.utc) - self.assertEqual(gdt.strftime("%c %Z"), + self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"), 'Mon Jun 23 21:59:59 1941 UTC') ldt = datetime(1941, 6, 24, 0, tzinfo=Vilnius) gdt = ldt.astimezone(timezone.utc) - self.assertEqual(gdt.strftime("%c %Z"), + self.assertEqual(gdt.strftime("%a %b %d %H:%M:%S %Y %Z"), 'Mon Jun 23 22:00:00 1941 UTC') def test_constructors(self): diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py index d94fb84a743..cfb009c203e 100644 --- a/Lib/test/libregrtest/utils.py +++ b/Lib/test/libregrtest/utils.py @@ -294,6 +294,25 @@ def clear_caches(): else: importlib_metadata.FastPath.__new__.cache_clear() + try: + encodings = sys.modules['encodings'] + except KeyError: + pass + else: + encodings._cache.clear() + + try: + codecs = sys.modules['codecs'] + except KeyError: + pass + else: + # There's no direct API to clear the codecs search cache, but + # `unregister` clears it implicitly. + def noop_search_function(name): + return None + codecs.register(noop_search_function) + codecs.unregister(noop_search_function) + def get_build_info(): # Get most important configure and build options as a list of strings. diff --git a/Lib/test/mp_preload_flush.py b/Lib/test/mp_preload_flush.py index 3501554d366..c195a9ef6b2 100644 --- a/Lib/test/mp_preload_flush.py +++ b/Lib/test/mp_preload_flush.py @@ -1,15 +1,11 @@ import multiprocessing import sys -modname = 'preloaded_module' +print(__name__, end='', file=sys.stderr) +print(__name__, end='', file=sys.stdout) if __name__ == '__main__': - if modname in sys.modules: - raise AssertionError(f'{modname!r} is not in sys.modules') multiprocessing.set_start_method('forkserver') - multiprocessing.set_forkserver_preload([modname]) for _ in range(2): p = multiprocessing.Process() p.start() p.join() -elif modname not in sys.modules: - raise AssertionError(f'{modname!r} is not in sys.modules') diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py index e3663e44546..4e3468bfcde 100644 --- a/Lib/test/pickletester.py +++ b/Lib/test/pickletester.py @@ -74,6 +74,15 @@ def count_opcode(code, pickle): def identity(x): return x +def itersize(start, stop): + # Produce geometrical increasing sequence from start to stop + # (inclusively) for tests. + size = start + while size < stop: + yield size + size <<= 1 + yield stop + class UnseekableIO(io.BytesIO): def peek(self, *args): @@ -853,9 +862,8 @@ def assert_is_copy(self, obj, objcopy, msg=None): self.assertEqual(getattr(obj, slot, None), getattr(objcopy, slot, None), msg=msg) - def check_unpickling_error(self, errors, data): - with self.subTest(data=data), \ - self.assertRaises(errors): + def check_unpickling_error_strict(self, errors, data): + with self.assertRaises(errors): try: self.loads(data) except BaseException as exc: @@ -864,6 +872,10 @@ def check_unpickling_error(self, errors, data): (data, exc.__class__.__name__, exc)) raise + def check_unpickling_error(self, errors, data): + with self.subTest(data=data): + self.check_unpickling_error_strict(errors, data) + def test_load_from_data0(self): self.assert_is_copy(self._testdata, self.loads(DATA0)) @@ -1150,6 +1162,155 @@ def test_negative_32b_binput(self): dumped = b'\x80\x03X\x01\x00\x00\x00ar\xff\xff\xff\xff.' self.check_unpickling_error(ValueError, dumped) + def test_too_large_put(self): + # Test that PUT with large id does not cause allocation of + # too large memo table. The C implementation uses a dict-based memo + # for sparse indices (when idx > memo_len * 2) instead of allocating + # a massive array. This test verifies large sparse indices work without + # causing memory exhaustion. + # + # The following simple pickle creates an empty list, memoizes it + # using a large index, then loads it back on the stack, builds + # a tuple containing 2 identical empty lists and returns it. + data = lambda n: (b'((lp' + str(n).encode() + b'\n' + + b'g' + str(n).encode() + b'\nt.') + # 0: ( MARK + # 1: ( MARK + # 2: l LIST (MARK at 1) + # 3: p PUT 1000000000000 + # 18: g GET 1000000000000 + # 33: t TUPLE (MARK at 0) + # 34: . STOP + for idx in [10**6, 10**9, 10**12]: + if idx > sys.maxsize: + continue + self.assertEqual(self.loads(data(idx)), ([],)*2) + + def test_too_large_long_binput(self): + # Test that LONG_BINPUT with large id does not cause allocation of + # too large memo table. The C implementation uses a dict-based memo + # for sparse indices (when idx > memo_len * 2) instead of allocating + # a massive array. This test verifies large sparse indices work without + # causing memory exhaustion. + # + # The following simple pickle creates an empty list, memoizes it + # using a large index, then loads it back on the stack, builds + # a tuple containing 2 identical empty lists and returns it. + data = lambda n: (b'(]r' + struct.pack(' sys.maxsize')) + + def test_truncated_large_binunicode8(self): + data = lambda size: b'\x8d' + struct.pack(' An OSError is raised if the IO object does not use a file descriptor. + # gh-141570: Check OSError is caught and handled + with unittest.mock.patch("os.isatty", side_effect=ZeroDivisionError): + file = unittest.mock.MagicMock() + file.fileno.side_effect = OSError + file.isatty.return_value = True + self.assertEqual(_colorize.can_colorize(file=file), True) + file.isatty.return_value = False + self.assertEqual(_colorize.can_colorize(file=file), False) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index fd5d43b09b9..8208d0e9c94 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -9,6 +9,7 @@ import pickle from string.templatelib import Template, Interpolation import typing +import sys import unittest from annotationlib import ( Format, @@ -73,6 +74,30 @@ def inner(arg: x): anno = get_annotations(inner, format=Format.FORWARDREF) self.assertEqual(anno["arg"], x) + def test_multiple_closure(self): + def inner(arg: x[y]): + pass + + fwdref = get_annotations(inner, format=Format.FORWARDREF)["arg"] + self.assertIsInstance(fwdref, ForwardRef) + self.assertEqual(fwdref.__forward_arg__, "x[y]") + with self.assertRaises(NameError): + fwdref.evaluate() + + y = str + fwdref = get_annotations(inner, format=Format.FORWARDREF)["arg"] + self.assertIsInstance(fwdref, ForwardRef) + extra_name, extra_val = next(iter(fwdref.__extra_names__.items())) + self.assertEqual(fwdref.__forward_arg__.replace(extra_name, extra_val.__name__), "x[str]") + with self.assertRaises(NameError): + fwdref.evaluate() + + x = list + self.assertEqual(fwdref.evaluate(), x[y]) + + fwdref = get_annotations(inner, format=Format.FORWARDREF)["arg"] + self.assertEqual(fwdref, x[y]) + def test_function(self): def f(x: int, y: doesntexist): pass @@ -94,6 +119,10 @@ def f( alpha: some | obj, beta: +some, gamma: some < obj, + delta: some | {obj: module}, + epsilon: some | {obj, module}, + zeta: some | [obj], + eta: some | (), ): pass @@ -122,6 +151,69 @@ def f( self.assertIsInstance(gamma_anno, ForwardRef) self.assertEqual(gamma_anno, support.EqualToForwardRef("some < obj", owner=f)) + delta_anno = anno["delta"] + self.assertIsInstance(delta_anno, ForwardRef) + self.assertEqual(delta_anno, support.EqualToForwardRef("some | {obj: module}", owner=f)) + + epsilon_anno = anno["epsilon"] + self.assertIsInstance(epsilon_anno, ForwardRef) + self.assertEqual(epsilon_anno, support.EqualToForwardRef("some | {obj, module}", owner=f)) + + zeta_anno = anno["zeta"] + self.assertIsInstance(zeta_anno, ForwardRef) + self.assertEqual(zeta_anno, support.EqualToForwardRef("some | [obj]", owner=f)) + + eta_anno = anno["eta"] + self.assertIsInstance(eta_anno, ForwardRef) + self.assertEqual(eta_anno, support.EqualToForwardRef("some | ()", owner=f)) + + def test_partially_nonexistent(self): + # These annotations start with a non-existent variable and then use + # global types with defined values. This partially evaluates by putting + # those globals into `fwdref.__extra_names__`. + def f( + x: obj | int, + y: container[int:obj, int], + z: dict_val | {str: int}, + alpha: set_val | {str, int}, + beta: obj | bool | int, + gamma: obj | call_func(int, kwd=bool), + ): + pass + + def func(*args, **kwargs): + return Union[*args, *(kwargs.values())] + + anno = get_annotations(f, format=Format.FORWARDREF) + globals_ = { + "obj": str, "container": list, "dict_val": {1: 2}, "set_val": {1, 2}, + "call_func": func + } + + x_anno = anno["x"] + self.assertIsInstance(x_anno, ForwardRef) + self.assertEqual(x_anno.evaluate(globals=globals_), str | int) + + y_anno = anno["y"] + self.assertIsInstance(y_anno, ForwardRef) + self.assertEqual(y_anno.evaluate(globals=globals_), list[int:str, int]) + + z_anno = anno["z"] + self.assertIsInstance(z_anno, ForwardRef) + self.assertEqual(z_anno.evaluate(globals=globals_), {1: 2} | {str: int}) + + alpha_anno = anno["alpha"] + self.assertIsInstance(alpha_anno, ForwardRef) + self.assertEqual(alpha_anno.evaluate(globals=globals_), {1, 2} | {str, int}) + + beta_anno = anno["beta"] + self.assertIsInstance(beta_anno, ForwardRef) + self.assertEqual(beta_anno.evaluate(globals=globals_), str | bool | int) + + gamma_anno = anno["gamma"] + self.assertIsInstance(gamma_anno, ForwardRef) + self.assertEqual(gamma_anno.evaluate(globals=globals_), str | func(int, kwd=bool)) + def test_partially_nonexistent_union(self): # Test unions with '|' syntax equal unions with typing.Union[] with some forwardrefs class UnionForwardrefs: @@ -755,6 +847,8 @@ def test_stringized_annotations_in_module(self): for kwargs in [ {"eval_str": True}, + {"eval_str": True, "globals": isa.__dict__, "locals": {}}, + {"eval_str": True, "globals": {}, "locals": isa.__dict__}, {"format": Format.VALUE, "eval_str": True}, ]: with self.subTest(**kwargs): @@ -788,7 +882,7 @@ def test_stringized_annotations_in_empty_module(self): self.assertEqual(get_annotations(isa2, eval_str=False), {}) def test_stringized_annotations_with_star_unpack(self): - def f(*args: *tuple[int, ...]): ... + def f(*args: "*tuple[int, ...]"): ... self.assertEqual(get_annotations(f, eval_str=True), {'args': (*tuple[int, ...],)[0]}) @@ -811,6 +905,44 @@ def test_stringized_annotations_on_wrapper(self): {"a": "int", "b": "str", "return": "MyClass"}, ) + def test_stringized_annotations_on_partial_wrapper(self): + isa = inspect_stringized_annotations + + def times_three_str(fn: typing.Callable[[str], isa.MyClass]): + @functools.wraps(fn) + def wrapper(b: "str") -> "MyClass": + return fn(b * 3) + + return wrapper + + wrapped = times_three_str(functools.partial(isa.function, 1)) + self.assertEqual(wrapped("x"), isa.MyClass(1, "xxx")) + self.assertIsNot(wrapped.__globals__, isa.function.__globals__) + self.assertEqual( + get_annotations(wrapped, eval_str=True), + {"b": str, "return": isa.MyClass}, + ) + self.assertEqual( + get_annotations(wrapped, eval_str=False), + {"b": "str", "return": "MyClass"}, + ) + + # If functools is not loaded, names will be evaluated in the current + # module instead of being unwrapped to the original. + functools_mod = sys.modules["functools"] + del sys.modules["functools"] + + self.assertEqual( + get_annotations(wrapped, eval_str=True), + {"b": str, "return": MyClass}, + ) + self.assertEqual( + get_annotations(wrapped, eval_str=False), + {"b": "str", "return": "MyClass"}, + ) + + sys.modules["functools"] = functools_mod + def test_stringized_annotations_on_class(self): isa = inspect_stringized_annotations # test that local namespace lookups work @@ -823,6 +955,16 @@ def test_stringized_annotations_on_class(self): {"x": int}, ) + def test_stringized_annotations_on_custom_object(self): + class HasAnnotations: + @property + def __annotations__(self): + return {"x": "int"} + + ha = HasAnnotations() + self.assertEqual(get_annotations(ha), {"x": "int"}) + self.assertEqual(get_annotations(ha, eval_str=True), {"x": int}) + def test_stringized_annotation_permutations(self): def define_class(name, has_future, has_annos, base_text, extra_names=None): lines = [] @@ -990,6 +1132,23 @@ def __annotate__(self): {"x": "int"}, ) + def test_non_dict_annotate(self): + class WeirdAnnotate: + def __annotate__(self, *args, **kwargs): + return "not a dict" + + wa = WeirdAnnotate() + for format in Format: + if format == Format.VALUE_WITH_FAKE_GLOBALS: + continue + with ( + self.subTest(format=format), + self.assertRaisesRegex( + ValueError, r".*__annotate__ returned a non-dict" + ), + ): + get_annotations(wa, format=format) + def test_no_annotations(self): class CustomClass: pass @@ -1247,6 +1406,32 @@ def evaluate(format, exc=NotImplementedError): "undefined", ) + def test_fake_global_evaluation(self): + # This will raise an AttributeError + def evaluate_union(format, exc=NotImplementedError): + if format == Format.VALUE_WITH_FAKE_GLOBALS: + # Return a ForwardRef + return builtins.undefined | list[int] + raise exc + + self.assertEqual( + annotationlib.call_evaluate_function(evaluate_union, Format.FORWARDREF), + support.EqualToForwardRef("builtins.undefined | list[int]"), + ) + + # This will raise an AttributeError + def evaluate_intermediate(format, exc=NotImplementedError): + if format == Format.VALUE_WITH_FAKE_GLOBALS: + intermediate = builtins.undefined + # Return a literal + return intermediate is None + raise exc + + self.assertIs( + annotationlib.call_evaluate_function(evaluate_intermediate, Format.FORWARDREF), + False, + ) + class TestCallAnnotateFunction(unittest.TestCase): # Tests for user defined annotate functions. @@ -1388,6 +1573,23 @@ def annotate(format, /): with self.assertRaises(NotImplementedError): annotationlib.call_annotate_function(annotate, Format.STRING) + def test_unsupported_formats(self): + def annotate(format, /): + if format == Format.FORWARDREF: + return {"x": str} + else: + raise NotImplementedError(format) + + with self.assertRaises(ValueError): + annotationlib.call_annotate_function(annotate, Format.VALUE_WITH_FAKE_GLOBALS) + + with self.assertRaises(RuntimeError): + annotationlib.call_annotate_function(annotate, Format.VALUE) + + with self.assertRaises(ValueError): + # Some non-Format value + annotationlib.call_annotate_function(annotate, 7) + def test_error_from_value_raised(self): # Test that the error from format.VALUE is raised # if all formats fail @@ -1688,6 +1890,14 @@ def test_forward_repr(self): repr(List[ForwardRef("int", module="mod")]), "typing.List[ForwardRef('int', module='mod')]", ) + self.assertEqual( + repr(List[ForwardRef("int", module="mod", is_class=True)]), + "typing.List[ForwardRef('int', module='mod', is_class=True)]", + ) + self.assertEqual( + repr(List[ForwardRef("int", owner="class")]), + "typing.List[ForwardRef('int', owner='class')]", + ) def test_forward_recursion_actually(self): def namespace1(): @@ -1793,6 +2003,19 @@ def test_evaluate_forwardref_format(self): support.EqualToForwardRef('"a" + 1'), ) + def test_evaluate_notimplemented_format(self): + class C: + x: alias + + fwdref = get_annotations(C, format=Format.FORWARDREF)["x"] + + with self.assertRaises(NotImplementedError): + fwdref.evaluate(format=Format.VALUE_WITH_FAKE_GLOBALS) + + with self.assertRaises(NotImplementedError): + # Some other unsupported value + fwdref.evaluate(format=7) + def test_evaluate_with_type_params(self): class Gen[T]: alias = int @@ -1926,6 +2149,56 @@ def test_fwdref_invalid_syntax(self): with self.assertRaises(SyntaxError): fr.evaluate() + def test_re_evaluate_generics(self): + global global_alias + + # If we've already run this test before, + # ensure the variable is still undefined + if "global_alias" in globals(): + del global_alias + + class C: + x: global_alias[int] + + # Evaluate the ForwardRef once + evaluated = get_annotations(C, format=Format.FORWARDREF)["x"].evaluate( + format=Format.FORWARDREF + ) + + # Now define the global and ensure that the ForwardRef evaluates + global_alias = list + self.assertEqual(evaluated.evaluate(), list[int]) + + def test_fwdref_evaluate_argument_mutation(self): + class C[T]: + nonlocal alias + x: alias[T] + + # Mutable arguments + globals_ = globals() + globals_copy = globals_.copy() + locals_ = locals() + locals_copy = locals_.copy() + + # Evaluate the ForwardRef, ensuring we use __cell__ and type params + get_annotations(C, format=Format.FORWARDREF)["x"].evaluate( + globals=globals_, + locals=locals_, + type_params=C.__type_params__, + format=Format.FORWARDREF, + ) + + # Check if the passed in mutable arguments equal the originals + self.assertEqual(globals_, globals_copy) + self.assertEqual(locals_, locals_copy) + + alias = list + + def test_fwdref_final_class(self): + with self.assertRaises(TypeError): + class C(ForwardRef): + pass + class TestAnnotationLib(unittest.TestCase): def test__all__(self): diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index d6c9c1ef2c8..dff7ba750fa 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -581,13 +581,22 @@ class TestOptionalsShortLong(ParserTestCase): class TestOptionalsDest(ParserTestCase): """Tests various means of setting destination""" - argument_signatures = [Sig('--foo-bar'), Sig('--baz', dest='zabbaz')] + argument_signatures = [ + Sig('-x', '-foobar', '--foo-bar', '-barfoo', '-X'), + Sig('--baz', dest='zabbaz'), + Sig('-y', '-qux', '-Y'), + Sig('-z'), + ] failures = ['a'] successes = [ - ('--foo-bar f', NS(foo_bar='f', zabbaz=None)), - ('--baz g', NS(foo_bar=None, zabbaz='g')), - ('--foo-bar h --baz i', NS(foo_bar='h', zabbaz='i')), - ('--baz j --foo-bar k', NS(foo_bar='k', zabbaz='j')), + ('--foo-bar f', NS(foo_bar='f', zabbaz=None, qux=None, z=None)), + ('-x f', NS(foo_bar='f', zabbaz=None, qux=None, z=None)), + ('--baz g', NS(foo_bar=None, zabbaz='g', qux=None, z=None)), + ('--foo-bar h --baz i', NS(foo_bar='h', zabbaz='i', qux=None, z=None)), + ('--baz j --foo-bar k', NS(foo_bar='k', zabbaz='j', qux=None, z=None)), + ('-qux l', NS(foo_bar=None, zabbaz=None, qux='l', z=None)), + ('-y l', NS(foo_bar=None, zabbaz=None, qux='l', z=None)), + ('-z m', NS(foo_bar=None, zabbaz=None, qux=None, z='m')), ] @@ -796,6 +805,76 @@ def test_invalid_name(self): self.assertEqual(str(cm.exception), "invalid option name '--no-foo' for BooleanOptionalAction") +class TestBooleanOptionalActionSingleDash(ParserTestCase): + """Tests BooleanOptionalAction with single dash""" + + argument_signatures = [ + Sig('-foo', '-x', action=argparse.BooleanOptionalAction), + ] + failures = ['--foo', '--no-foo', '-no-foo', '-no-x', '-nox'] + successes = [ + ('', NS(foo=None)), + ('-foo', NS(foo=True)), + ('-nofoo', NS(foo=False)), + ('-x', NS(foo=True)), + ] + + def test_invalid_name(self): + parser = argparse.ArgumentParser() + with self.assertRaises(ValueError) as cm: + parser.add_argument('-nofoo', action=argparse.BooleanOptionalAction) + self.assertEqual(str(cm.exception), + "invalid option name '-nofoo' for BooleanOptionalAction") + +class TestBooleanOptionalActionAlternatePrefixChars(ParserTestCase): + """Tests BooleanOptionalAction with custom prefixes""" + + parser_signature = Sig(prefix_chars='+-', add_help=False) + argument_signatures = [Sig('++foo', action=argparse.BooleanOptionalAction)] + failures = ['--foo', '--no-foo'] + successes = [ + ('', NS(foo=None)), + ('++foo', NS(foo=True)), + ('++no-foo', NS(foo=False)), + ] + + def test_invalid_name(self): + parser = argparse.ArgumentParser(prefix_chars='+/') + with self.assertRaisesRegex(ValueError, + 'BooleanOptionalAction.*is not valid for positional arguments'): + parser.add_argument('--foo', action=argparse.BooleanOptionalAction) + with self.assertRaises(ValueError) as cm: + parser.add_argument('++no-foo', action=argparse.BooleanOptionalAction) + self.assertEqual(str(cm.exception), + "invalid option name '++no-foo' for BooleanOptionalAction") + +class TestBooleanOptionalActionSingleAlternatePrefixChar(ParserTestCase): + """Tests BooleanOptionalAction with single alternate prefix char""" + + parser_signature = Sig(prefix_chars='+/', add_help=False) + argument_signatures = [ + Sig('+foo', '+x', action=argparse.BooleanOptionalAction), + ] + failures = ['++foo', '++no-foo', '++nofoo', + '-no-foo', '-nofoo', '+no-foo', '-nofoo', + '+no-x', '+nox', '-no-x', '-nox'] + successes = [ + ('', NS(foo=None)), + ('+foo', NS(foo=True)), + ('+nofoo', NS(foo=False)), + ('+x', NS(foo=True)), + ] + + def test_invalid_name(self): + parser = argparse.ArgumentParser(prefix_chars='+/') + with self.assertRaisesRegex(ValueError, + 'BooleanOptionalAction.*is not valid for positional arguments'): + parser.add_argument('-foo', action=argparse.BooleanOptionalAction) + with self.assertRaises(ValueError) as cm: + parser.add_argument('+nofoo', action=argparse.BooleanOptionalAction) + self.assertEqual(str(cm.exception), + "invalid option name '+nofoo' for BooleanOptionalAction") + class TestBooleanOptionalActionRequired(ParserTestCase): """Tests BooleanOptionalAction required""" @@ -2283,6 +2362,7 @@ class TestNegativeNumber(ParserTestCase): ('--complex -1e-3j', NS(int=None, float=None, complex=-0.001j)), ] +@force_not_colorized_test_class class TestArgumentAndSubparserSuggestions(TestCase): """Test error handling and suggestion when a user makes a typo""" @@ -4886,6 +4966,25 @@ def test_long_mutex_groups_wrap(self): ''') self.assertEqual(parser.format_usage(), usage) + def test_mutex_groups_with_mixed_optionals_positionals_wrap(self): + # https://github.com/python/cpython/issues/75949 + # Mutually exclusive groups containing both optionals and positionals + # should preserve pipe separators when the usage line wraps. + parser = argparse.ArgumentParser(prog='PROG') + g = parser.add_mutually_exclusive_group() + g.add_argument('-v', '--verbose', action='store_true') + g.add_argument('-q', '--quiet', action='store_true') + g.add_argument('-x', '--extra-long-option-name', nargs='?') + g.add_argument('-y', '--yet-another-long-option', nargs='?') + g.add_argument('positional', nargs='?') + + usage = textwrap.dedent('''\ + usage: PROG [-h] [-v | -q | -x [EXTRA_LONG_OPTION_NAME] | + -y [YET_ANOTHER_LONG_OPTION] | + positional] + ''') + self.assertEqual(parser.format_usage(), usage) + class TestHelpVariableExpansion(HelpTestCase): """Test that variables are expanded properly in help messages""" @@ -5610,6 +5709,8 @@ def test_invalid_option_strings(self): self.assertTypeError('-', errmsg='dest= is required') self.assertTypeError('--', errmsg='dest= is required') self.assertTypeError('---', errmsg='dest= is required') + self.assertTypeError('-', '--', '---', + errmsg="dest= is required for options like '-', '--', '---'") def test_invalid_prefix(self): self.assertValueError('--foo', '+foo', @@ -6147,6 +6248,7 @@ def spam(string_to_convert): # Check that deprecated arguments output warning # ============================================== +@force_not_colorized_test_class class TestDeprecatedArguments(TestCase): def test_deprecated_option(self): @@ -7205,6 +7307,8 @@ def test_argparse_color(self): short_b = self.theme.short_option label_b = self.theme.label pos_b = self.theme.action + default = self.theme.default + default_value = self.theme.default_value reset = self.theme.reset # Act @@ -7231,17 +7335,17 @@ def test_argparse_color(self): {heading}options:{reset} {short_b}-h{reset}, {long_b}--help{reset} show this help message and exit - {short_b}-v{reset}, {long_b}--verbose{reset} more spam (default: False) - {short_b}-q{reset}, {long_b}--quiet{reset} less spam (default: False) + {short_b}-v{reset}, {long_b}--verbose{reset} more spam {default}(default: {default_value}False{default}){reset} + {short_b}-q{reset}, {long_b}--quiet{reset} less spam {default}(default: {default_value}False{default}){reset} {short_b}-o{reset}, {long_b}--optional1{reset} {long_b}--optional2{reset} {label_b}OPTIONAL2{reset} - pick one (default: None) + pick one {default}(default: {default_value}None{default}){reset} {long_b}--optional3{reset} {label_b}{{X,Y,Z}}{reset} - {long_b}--optional4{reset} {label_b}{{X,Y,Z}}{reset} pick one (default: None) - {long_b}--optional5{reset} {label_b}{{X,Y,Z}}{reset} pick one (default: None) - {long_b}--optional6{reset} {label_b}{{X,Y,Z}}{reset} pick one (default: None) + {long_b}--optional4{reset} {label_b}{{X,Y,Z}}{reset} pick one {default}(default: {default_value}None{default}){reset} + {long_b}--optional5{reset} {label_b}{{X,Y,Z}}{reset} pick one {default}(default: {default_value}None{default}){reset} + {long_b}--optional6{reset} {label_b}{{X,Y,Z}}{reset} pick one {default}(default: {default_value}None{default}){reset} {short_b}-p{reset}, {long_b}--optional7{reset} {label_b}{{Aaaaa,Bbbbb,Ccccc,Ddddd}}{reset} - pick one (default: None) + pick one {default}(default: {default_value}None{default}){reset} {short_b}+f{reset} {label_b}F{reset} {long_b}++bar{reset} {label_b}BAR{reset} {long_b}-+baz{reset} {label_b}BAZ{reset} @@ -7370,6 +7474,45 @@ def test_subparser_prog_is_stored_without_color(self): help_text = demo_parser.format_help() self.assertNotIn('\x1b[', help_text) + def test_error_and_warning_keywords_colorized(self): + parser = argparse.ArgumentParser(prog='PROG') + parser.add_argument('foo') + + with self.assertRaises(SystemExit): + with captured_stderr() as stderr: + parser.parse_args([]) + + err = stderr.getvalue() + error_color = self.theme.error + reset = self.theme.reset + self.assertIn(f'{error_color}error:{reset}', err) + + with captured_stderr() as stderr: + parser._warning('test warning') + + warn = stderr.getvalue() + warning_color = self.theme.warning + self.assertIn(f'{warning_color}warning:{reset}', warn) + + def test_error_and_warning_not_colorized_when_disabled(self): + parser = argparse.ArgumentParser(prog='PROG', color=False) + parser.add_argument('foo') + + with self.assertRaises(SystemExit): + with captured_stderr() as stderr: + parser.parse_args([]) + + err = stderr.getvalue() + self.assertNotIn('\x1b[', err) + self.assertIn('error:', err) + + with captured_stderr() as stderr: + parser._warning('test warning') + + warn = stderr.getvalue() + self.assertNotIn('\x1b[', warn) + self.assertIn('warning:', warn) + class TestModule(unittest.TestCase): def test_deprecated__version__(self): diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 1ad381f883b..12cae3c20fc 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -1083,6 +1083,16 @@ def test_filter_syntax_warnings_by_module(self): self.assertEqual(wm.filename, '') self.assertIs(wm.category, SyntaxWarning) + with warnings.catch_warnings(record=True) as wlog: + warnings.simplefilter('error') + warnings.filterwarnings('always', module=r'package\.module\z') + warnings.filterwarnings('error', module=r'') + ast.parse(source, filename, module='package.module') + self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10]) + for wm in wlog: + self.assertEqual(wm.filename, filename) + self.assertIs(wm.category, SyntaxWarning) + class CopyTests(unittest.TestCase): """Test copying and pickling AST nodes.""" @@ -3047,8 +3057,8 @@ def test_source_segment_missing_info(self): class NodeTransformerTests(ASTTestMixin, unittest.TestCase): def assertASTTransformation(self, transformer_class, - initial_code, expected_code): - initial_ast = ast.parse(dedent(initial_code)) + code, expected_code): + initial_ast = ast.parse(dedent(code)) expected_ast = ast.parse(dedent(expected_code)) transformer = transformer_class() diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py index 3a17c169c34..bf301740741 100644 --- a/Lib/test/test_asyncio/test_subprocess.py +++ b/Lib/test/test_asyncio/test_subprocess.py @@ -11,7 +11,7 @@ from asyncio import subprocess from test.test_asyncio import utils as test_utils from test import support -from test.support import os_helper +from test.support import os_helper, warnings_helper, gc_collect if not support.has_subprocess_support: raise unittest.SkipTest("test module requires subprocess") @@ -879,6 +879,44 @@ async def main(): self.loop.run_until_complete(main()) + @warnings_helper.ignore_warnings(category=ResourceWarning) + def test_subprocess_read_pipe_cancelled(self): + async def main(): + loop = asyncio.get_running_loop() + loop.connect_read_pipe = mock.AsyncMock(side_effect=asyncio.CancelledError) + with self.assertRaises(asyncio.CancelledError): + await asyncio.create_subprocess_exec(*PROGRAM_BLOCKED, stderr=asyncio.subprocess.PIPE) + + asyncio.run(main()) + gc_collect() + + @warnings_helper.ignore_warnings(category=ResourceWarning) + def test_subprocess_write_pipe_cancelled(self): + async def main(): + loop = asyncio.get_running_loop() + loop.connect_write_pipe = mock.AsyncMock(side_effect=asyncio.CancelledError) + with self.assertRaises(asyncio.CancelledError): + await asyncio.create_subprocess_exec(*PROGRAM_BLOCKED, stdin=asyncio.subprocess.PIPE) + + asyncio.run(main()) + gc_collect() + + @warnings_helper.ignore_warnings(category=ResourceWarning) + def test_subprocess_read_write_pipe_cancelled(self): + async def main(): + loop = asyncio.get_running_loop() + loop.connect_read_pipe = mock.AsyncMock(side_effect=asyncio.CancelledError) + loop.connect_write_pipe = mock.AsyncMock(side_effect=asyncio.CancelledError) + with self.assertRaises(asyncio.CancelledError): + await asyncio.create_subprocess_exec( + *PROGRAM_BLOCKED, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + asyncio.run(main()) + gc_collect() if sys.platform != 'win32': # Unix diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py index 6b5c65a56d8..ac3f0940545 100644 --- a/Lib/test/test_base64.py +++ b/Lib/test/test_base64.py @@ -231,18 +231,6 @@ def test_b64decode(self): self.check_other_types(base64.b64decode, b"YWJj", b"abc") self.check_decode_type_errors(base64.b64decode) - # Test with arbitrary alternative characters - tests_altchars = {(b'01a*b$cd', b'*$'): b'\xd3V\xbeo\xf7\x1d', - } - for (data, altchars), res in tests_altchars.items(): - data_str = data.decode('ascii') - altchars_str = altchars.decode('ascii') - - eq(base64.b64decode(data, altchars=altchars), res) - eq(base64.b64decode(data_str, altchars=altchars), res) - eq(base64.b64decode(data, altchars=altchars_str), res) - eq(base64.b64decode(data_str, altchars=altchars_str), res) - # Test standard alphabet for data, res in tests.items(): eq(base64.standard_b64decode(data), res) @@ -263,6 +251,20 @@ def test_b64decode(self): b'\xd3V\xbeo\xf7\x1d') self.check_decode_type_errors(base64.urlsafe_b64decode) + def test_b64decode_altchars(self): + # Test with arbitrary alternative characters + eq = self.assertEqual + res = b'\xd3V\xbeo\xf7\x1d' + for altchars in b'*$', b'+/', b'/+', b'+_', b'-+', b'-/', b'/_': + data = b'01a%cb%ccd' % tuple(altchars) + data_str = data.decode('ascii') + altchars_str = altchars.decode('ascii') + + eq(base64.b64decode(data, altchars=altchars), res) + eq(base64.b64decode(data_str, altchars=altchars), res) + eq(base64.b64decode(data, altchars=altchars_str), res) + eq(base64.b64decode(data_str, altchars=altchars_str), res) + def test_b64decode_padding_error(self): self.assertRaises(binascii.Error, base64.b64decode, b'abc') self.assertRaises(binascii.Error, base64.b64decode, 'abc') @@ -295,10 +297,12 @@ def test_b64decode_invalid_chars(self): base64.b64decode(bstr.decode('ascii'), validate=True) # Normal alphabet characters not discarded when alternative given - res = b'\xFB\xEF\xBE\xFF\xFF\xFF' - self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res) - self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res) - + res = b'\xfb\xef\xff' + self.assertEqual(base64.b64decode(b'++//', validate=True), res) + self.assertEqual(base64.b64decode(b'++//', '-_', validate=True), res) + self.assertEqual(base64.b64decode(b'--__', '-_', validate=True), res) + self.assertEqual(base64.urlsafe_b64decode(b'++//'), res) + self.assertEqual(base64.urlsafe_b64decode(b'--__'), res) def _altchars_strategy(): """Generate 'altchars' for base64 encoding.""" @@ -390,23 +394,33 @@ def test_b32decode_casefold(self): self.assertRaises(binascii.Error, base64.b32decode, b'me======') self.assertRaises(binascii.Error, base64.b32decode, 'me======') + def test_b32decode_map01(self): # Mapping zero and one - eq(base64.b32decode(b'MLO23456'), b'b\xdd\xad\xf3\xbe') - eq(base64.b32decode('MLO23456'), b'b\xdd\xad\xf3\xbe') + eq = self.assertEqual + res_L = b'b\xdd\xad\xf3\xbe' + res_I = b'b\x1d\xad\xf3\xbe' + eq(base64.b32decode(b'MLO23456'), res_L) + eq(base64.b32decode('MLO23456'), res_L) + eq(base64.b32decode(b'MIO23456'), res_I) + eq(base64.b32decode('MIO23456'), res_I) + self.assertRaises(binascii.Error, base64.b32decode, b'M1023456') + self.assertRaises(binascii.Error, base64.b32decode, b'M1O23456') + self.assertRaises(binascii.Error, base64.b32decode, b'ML023456') + self.assertRaises(binascii.Error, base64.b32decode, b'MI023456') - map_tests = {(b'M1023456', b'L'): b'b\xdd\xad\xf3\xbe', - (b'M1023456', b'I'): b'b\x1d\xad\xf3\xbe', - } - for (data, map01), res in map_tests.items(): - data_str = data.decode('ascii') + data = b'M1023456' + data_str = data.decode('ascii') + for map01, res in [(b'L', res_L), (b'I', res_I)]: map01_str = map01.decode('ascii') eq(base64.b32decode(data, map01=map01), res) eq(base64.b32decode(data_str, map01=map01), res) eq(base64.b32decode(data, map01=map01_str), res) eq(base64.b32decode(data_str, map01=map01_str), res) - self.assertRaises(binascii.Error, base64.b32decode, data) - self.assertRaises(binascii.Error, base64.b32decode, data_str) + + eq(base64.b32decode(b'M1O23456', map01=map01), res) + eq(base64.b32decode(b'M%c023456' % map01, map01=map01), res) + eq(base64.b32decode(b'M%cO23456' % map01, map01=map01), res) def test_b32decode_error(self): tests = [b'abc', b'ABCDEF==', b'==ABCDEF'] diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index fba46af6617..ce60a5d095d 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1103,7 +1103,8 @@ def test_exec_filter_syntax_warnings_by_module(self): with warnings.catch_warnings(record=True) as wlog: warnings.simplefilter('error') - warnings.filterwarnings('always', module=r'\z') + warnings.filterwarnings('always', module=r'package.module\z') + warnings.filterwarnings('error', module=r'') exec(source, {'__name__': 'package.module', '__file__': filename}) self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21]) for wm in wlog: diff --git a/Lib/test/test_bytes.py b/Lib/test/test_bytes.py index f10e4041937..a55ec6cf3b8 100644 --- a/Lib/test/test_bytes.py +++ b/Lib/test/test_bytes.py @@ -549,6 +549,17 @@ def test_hex_separator_basics(self): self.assertEqual(three_bytes.hex(':', 2), 'b9:01ef') self.assertEqual(three_bytes.hex(':', 1), 'b9:01:ef') self.assertEqual(three_bytes.hex('*', -2), 'b901*ef') + self.assertEqual(three_bytes.hex(sep=':', bytes_per_sep=2), 'b9:01ef') + self.assertEqual(three_bytes.hex(sep='*', bytes_per_sep=-2), 'b901*ef') + for bytes_per_sep in 3, -3, 2**31-1, -(2**31-1): + with self.subTest(bytes_per_sep=bytes_per_sep): + self.assertEqual(three_bytes.hex(':', bytes_per_sep), 'b901ef') + for bytes_per_sep in 2**31, -2**31, 2**1000, -2**1000: + with self.subTest(bytes_per_sep=bytes_per_sep): + try: + self.assertEqual(three_bytes.hex(':', bytes_per_sep), 'b901ef') + except OverflowError: + pass value = b'{s\005\000\000\000worldi\002\000\000\000s\005\000\000\000helloi\001\000\000\0000' self.assertEqual(value.hex('.', 8), '7b7305000000776f.726c646902000000.730500000068656c.6c6f690100000030') @@ -802,6 +813,13 @@ def __int__(self): with self.assertRaisesRegex(TypeError, msg): operator.mod(format_bytes, value) + def test_memory_leak_gh_140939(self): + # gh-140939: MemoryError is raised without leaking + _testcapi = import_helper.import_module('_testcapi') + with self.assertRaises(MemoryError): + b = self.type2test(b'%*b') + b % (_testcapi.PY_SSIZE_T_MAX, b'abc') + def test_imod(self): b = self.type2test(b'hello, %b!') orig = b @@ -1390,6 +1408,16 @@ def test_clear(self): b.append(ord('p')) self.assertEqual(b, b'p') + # Cleared object should be empty. + b = bytearray(b'abc') + b.clear() + self.assertEqual(b.__alloc__(), 0) + base_size = sys.getsizeof(bytearray()) + self.assertEqual(sys.getsizeof(b), base_size) + c = b.copy() + self.assertEqual(c.__alloc__(), 0) + self.assertEqual(sys.getsizeof(c), base_size) + def test_copy(self): b = bytearray(b'abc') bb = b.copy() @@ -1451,6 +1479,87 @@ def test_resize(self): self.assertRaises(MemoryError, bytearray().resize, sys.maxsize) self.assertRaises(MemoryError, bytearray(1000).resize, sys.maxsize) + def test_take_bytes(self): + ba = bytearray(b'ab') + self.assertEqual(ba.take_bytes(), b'ab') + self.assertEqual(len(ba), 0) + self.assertEqual(ba, bytearray(b'')) + self.assertEqual(ba.__alloc__(), 0) + base_size = sys.getsizeof(bytearray()) + self.assertEqual(sys.getsizeof(ba), base_size) + + # Positive and negative slicing. + ba = bytearray(b'abcdef') + self.assertEqual(ba.take_bytes(1), b'a') + self.assertEqual(ba, bytearray(b'bcdef')) + self.assertEqual(len(ba), 5) + self.assertEqual(ba.take_bytes(-5), b'') + self.assertEqual(ba, bytearray(b'bcdef')) + self.assertEqual(len(ba), 5) + self.assertEqual(ba.take_bytes(-3), b'bc') + self.assertEqual(ba, bytearray(b'def')) + self.assertEqual(len(ba), 3) + self.assertEqual(ba.take_bytes(3), b'def') + self.assertEqual(ba, bytearray(b'')) + self.assertEqual(len(ba), 0) + + # Take nothing from emptiness. + self.assertEqual(ba.take_bytes(0), b'') + self.assertEqual(ba.take_bytes(), b'') + self.assertEqual(ba.take_bytes(None), b'') + + # Out of bounds, bad take value. + self.assertRaises(IndexError, ba.take_bytes, -1) + self.assertRaises(TypeError, ba.take_bytes, 3.14) + ba = bytearray(b'abcdef') + self.assertRaises(IndexError, ba.take_bytes, 7) + + # Offset between physical and logical start (ob_bytes != ob_start). + ba = bytearray(b'abcde') + del ba[:2] + self.assertEqual(ba, bytearray(b'cde')) + self.assertEqual(ba.take_bytes(), b'cde') + + # Overallocation at end. + ba = bytearray(b'abcde') + del ba[-2:] + self.assertEqual(ba, bytearray(b'abc')) + self.assertEqual(ba.take_bytes(), b'abc') + ba = bytearray(b'abcde') + ba.resize(4) + self.assertEqual(ba.take_bytes(), b'abcd') + + # Take of a bytearray with references should fail. + ba = bytearray(b'abc') + with memoryview(ba) as mv: + self.assertRaises(BufferError, ba.take_bytes) + self.assertEqual(ba.take_bytes(), b'abc') + + @support.cpython_only # tests an implementation detail + def test_take_bytes_optimization(self): + # Validate optimization around taking lots of little chunks out of a + # much bigger buffer. Save work by only copying a little rather than + # moving a lot. + ba = bytearray(b'abcdef' + b'0' * 1000) + start_alloc = ba.__alloc__() + + # Take two bytes at a time, checking alloc doesn't change. + self.assertEqual(ba.take_bytes(2), b'ab') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 4 + 1000) + self.assertEqual(ba.take_bytes(2), b'cd') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 2 + 1000) + self.assertEqual(ba.take_bytes(2), b'ef') + self.assertEqual(ba.__alloc__(), start_alloc) + self.assertEqual(len(ba), 0 + 1000) + self.assertEqual(ba.__alloc__(), start_alloc) + + # Take over half, alloc shrinks to exact size. + self.assertEqual(ba.take_bytes(501), b'0' * 501) + self.assertEqual(len(ba), 499) + bytes_header_size = sys.getsizeof(b'') + self.assertEqual(ba.__alloc__(), 499 + bytes_header_size) def test_setitem(self): def setitem_as_mapping(b, i, val): @@ -2557,6 +2666,22 @@ def zfill(b, a): c = a.zfill(0x400000) assert not c or c[-1] not in (0xdd, 0xcd) + def resize(b, a): # MODIFIES! + b.wait() + a.resize(10) + + def take_bytes(b, a): # MODIFIES! + b.wait() + c = a.take_bytes() + assert not c or c[0] == 48 # '0' + + def take_bytes_n(b, a): # MODIFIES! + b.wait() + try: + c = a.take_bytes(10) + assert c == b'0123456789' + except IndexError: pass + def check(funcs, a=None, *args): if a is None: a = bytearray(b'0' * 0x400000) @@ -2618,6 +2743,12 @@ def check(funcs, a=None, *args): check([clear] + [startswith] * 10) check([clear] + [strip] * 10) + check([clear] + [resize] * 10) + + check([clear] + [take_bytes] * 10) + check([take_bytes_n] * 10, bytearray(b'0123456789' * 0x400)) + check([take_bytes_n] * 10, bytearray(b'0123456789' * 5)) + check([clear] + [contains] * 10) check([clear] + [subscript] * 10) check([clear2] + [ass_subscript2] * 10, None, bytearray(b'0' * 0x400000)) diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py index 31e58e825be..f42526aee19 100644 --- a/Lib/test/test_call.py +++ b/Lib/test/test_call.py @@ -1048,9 +1048,14 @@ def get_sp(): this_sp = _testinternalcapi.get_stack_pointer() lower_sp = _testcapi.pyobject_vectorcall(get_sp, (), ()) - self.assertLess(lower_sp, this_sp) + if _testcapi._Py_STACK_GROWS_DOWN: + self.assertLess(lower_sp, this_sp) + safe_margin = this_sp - lower_sp + else: + self.assertGreater(lower_sp, this_sp) + safe_margin = lower_sp - this_sp # Add an (arbitrary) extra 25% for safety - safe_margin = (this_sp - lower_sp) * 5 / 4 + safe_margin = safe_margin * 5 / 4 self.assertLess(safe_margin, _testinternalcapi.get_stack_margin()) @skip_on_s390x diff --git a/Lib/test/test_capi/test_bytearray.py b/Lib/test/test_capi/test_bytearray.py index 52565ea34c6..cb7ad8b2225 100644 --- a/Lib/test/test_capi/test_bytearray.py +++ b/Lib/test/test_capi/test_bytearray.py @@ -1,3 +1,4 @@ +import sys import unittest from test.support import import_helper @@ -55,7 +56,9 @@ def test_fromstringandsize(self): self.assertEqual(fromstringandsize(b'', 0), bytearray()) self.assertEqual(fromstringandsize(NULL, 0), bytearray()) self.assertEqual(len(fromstringandsize(NULL, 3)), 3) - self.assertRaises(MemoryError, fromstringandsize, NULL, PY_SSIZE_T_MAX) + self.assertRaises(OverflowError, fromstringandsize, NULL, PY_SSIZE_T_MAX) + self.assertRaises(OverflowError, fromstringandsize, NULL, + PY_SSIZE_T_MAX-sys.getsizeof(b'') + 1) self.assertRaises(SystemError, fromstringandsize, b'abc', -1) self.assertRaises(SystemError, fromstringandsize, b'abc', PY_SSIZE_T_MIN) diff --git a/Lib/test/test_capi/test_float.py b/Lib/test/test_capi/test_float.py index 983b991b4f1..df7017e6436 100644 --- a/Lib/test/test_capi/test_float.py +++ b/Lib/test/test_capi/test_float.py @@ -29,6 +29,23 @@ NAN = float("nan") +def make_nan(size, sign, quiet, payload=None): + if size == 8: + payload_mask = 0x7ffffffffffff + i = (sign << 63) + (0x7ff << 52) + (quiet << 51) + elif size == 4: + payload_mask = 0x3fffff + i = (sign << 31) + (0xff << 23) + (quiet << 22) + elif size == 2: + payload_mask = 0x1ff + i = (sign << 15) + (0x1f << 10) + (quiet << 9) + else: + raise ValueError("size must be either 2, 4, or 8") + if payload is None: + payload = random.randint(not quiet, payload_mask) + return i + payload + + class CAPIFloatTest(unittest.TestCase): def test_check(self): # Test PyFloat_Check() @@ -202,16 +219,7 @@ def test_pack_unpack_roundtrip_for_nans(self): # HP PA RISC uses 0 for quiet, see: # https://en.wikipedia.org/wiki/NaN#Encoding signaling = 1 - quiet = int(not signaling) - if size == 8: - payload = random.randint(signaling, 0x7ffffffffffff) - i = (sign << 63) + (0x7ff << 52) + (quiet << 51) + payload - elif size == 4: - payload = random.randint(signaling, 0x3fffff) - i = (sign << 31) + (0xff << 23) + (quiet << 22) + payload - elif size == 2: - payload = random.randint(signaling, 0x1ff) - i = (sign << 15) + (0x1f << 10) + (quiet << 9) + payload + i = make_nan(size, sign, not signaling) data = bytes.fromhex(f'{i:x}') for endian in (BIG_ENDIAN, LITTLE_ENDIAN): with self.subTest(data=data, size=size, endian=endian): @@ -221,6 +229,32 @@ def test_pack_unpack_roundtrip_for_nans(self): self.assertTrue(math.isnan(value)) self.assertEqual(data1, data2) + @unittest.skipUnless(HAVE_IEEE_754, "requires IEEE 754") + @unittest.skipUnless(sys.maxsize != 2147483647, "requires 64-bit mode") + def test_pack_unpack_nans_for_different_formats(self): + pack = _testcapi.float_pack + unpack = _testcapi.float_unpack + + for endian in (BIG_ENDIAN, LITTLE_ENDIAN): + with self.subTest(endian=endian): + byteorder = "big" if endian == BIG_ENDIAN else "little" + + # Convert sNaN to qNaN, if payload got truncated + data = make_nan(8, 0, False, 0x80001).to_bytes(8, byteorder) + snan_low = unpack(data, endian) + qnan4 = make_nan(4, 0, True, 0).to_bytes(4, byteorder) + qnan2 = make_nan(2, 0, True, 0).to_bytes(2, byteorder) + self.assertEqual(pack(4, snan_low, endian), qnan4) + self.assertEqual(pack(2, snan_low, endian), qnan2) + + # Preserve NaN type, if payload not truncated + data = make_nan(8, 0, False, 0x80000000001).to_bytes(8, byteorder) + snan_high = unpack(data, endian) + snan4 = make_nan(4, 0, False, 16384).to_bytes(4, byteorder) + snan2 = make_nan(2, 0, False, 2).to_bytes(2, byteorder) + self.assertEqual(pack(4, snan_high, endian), snan4) + self.assertEqual(pack(2, snan_high, endian), snan2) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_capi/test_module.py b/Lib/test/test_capi/test_module.py new file mode 100644 index 00000000000..823e2ab6b2e --- /dev/null +++ b/Lib/test/test_capi/test_module.py @@ -0,0 +1,186 @@ +# The C functions used by this module are in: +# Modules/_testcapi/module.c + +import unittest +import types +from test.support import import_helper, subTests, requires_gil_enabled + +# Skip this test if the _testcapi module isn't available. +_testcapi = import_helper.import_module('_testcapi') + + +class FakeSpec: + name = 'testmod' + +DEF_SLOTS = ( + 'Py_mod_name', 'Py_mod_doc', 'Py_mod_state_size', 'Py_mod_methods', + 'Py_mod_state_traverse', 'Py_mod_state_clear', 'Py_mod_state_free', + 'Py_mod_token', +) + +def def_and_token(mod): + return ( + _testcapi.pymodule_get_def(mod), + _testcapi.pymodule_get_token(mod), + ) + +class TestModFromSlotsAndSpec(unittest.TestCase): + @requires_gil_enabled("empty slots re-enable GIL") + def test_empty(self): + mod = _testcapi.module_from_slots_empty(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + size = _testcapi.pymodule_get_state_size(mod) + self.assertEqual(size, 0) + + def test_null_slots(self): + with self.assertRaises(SystemError): + _testcapi.module_from_slots_null(FakeSpec()) + + def test_none_spec(self): + # The spec currently must contain a name + with self.assertRaises(AttributeError): + _testcapi.module_from_slots_empty(None) + with self.assertRaises(AttributeError): + _testcapi.module_from_slots_name(None) + + def test_name(self): + # Py_mod_name (and PyModuleDef.m_name) are currently ignored when + # spec is given. + # We still test that it's accepted. + mod = _testcapi.module_from_slots_name(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, None) + + def test_doc(self): + mod = _testcapi.module_from_slots_doc(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, 'the docstring') + + def test_size(self): + mod = _testcapi.module_from_slots_size(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, None) + size = _testcapi.pymodule_get_state_size(mod) + self.assertEqual(size, 123) + + def test_methods(self): + mod = _testcapi.module_from_slots_methods(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, None) + self.assertEqual(mod.a_method(456), (mod, 456)) + + def test_gc(self): + mod = _testcapi.module_from_slots_gc(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, None) + + # Check that the requested hook functions (which module_from_slots_gc + # stores as attributes) match what's in the module (as retrieved by + # _testinternalcapi.module_get_gc_hooks) + _testinternalcapi = import_helper.import_module('_testinternalcapi') + traverse, clear, free = _testinternalcapi.module_get_gc_hooks(mod) + self.assertEqual(traverse, mod.traverse) + self.assertEqual(clear, mod.clear) + self.assertEqual(free, mod.free) + + def test_token(self): + mod = _testcapi.module_from_slots_token(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, _testcapi.module_test_token)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, None) + + def test_exec(self): + mod = _testcapi.module_from_slots_exec(FakeSpec()) + self.assertIsInstance(mod, types.ModuleType) + self.assertEqual(def_and_token(mod), (0, 0)) + self.assertEqual(mod.__name__, 'testmod') + self.assertEqual(mod.__doc__, None) + self.assertEqual(mod.a_number, 456) + + def test_create(self): + spec = FakeSpec() + spec._gimme_this = "not a module object" + mod = _testcapi.module_from_slots_create(spec) + self.assertIsInstance(mod, str) + self.assertEqual(mod, "not a module object") + with self.assertRaises(TypeError): + _testcapi.pymodule_get_def(mod), + with self.assertRaises(TypeError): + _testcapi.pymodule_get_token(mod) + + def test_def_slot(self): + """Slots that replace PyModuleDef fields can't be used with PyModuleDef + """ + for name in DEF_SLOTS: + with self.subTest(name): + spec = FakeSpec() + spec._test_slot_id = getattr(_testcapi, name) + with self.assertRaises(SystemError) as cm: + _testcapi.module_from_def_slot(spec) + self.assertIn(name, str(cm.exception)) + self.assertIn("PyModuleDef", str(cm.exception)) + + def test_repeated_def_slot(self): + """Slots that replace PyModuleDef fields can't be repeated""" + for name in (*DEF_SLOTS, 'Py_mod_exec'): + with self.subTest(name): + spec = FakeSpec() + spec._test_slot_id = getattr(_testcapi, name) + with self.assertRaises(SystemError) as cm: + _testcapi.module_from_slots_repeat_slot(spec) + self.assertIn(name, str(cm.exception)) + self.assertIn("more than one", str(cm.exception)) + + def test_null_def_slot(self): + """Slots that replace PyModuleDef fields can't be NULL""" + for name in (*DEF_SLOTS, 'Py_mod_exec'): + with self.subTest(name): + spec = FakeSpec() + spec._test_slot_id = getattr(_testcapi, name) + with self.assertRaises(SystemError) as cm: + _testcapi.module_from_slots_null_slot(spec) + self.assertIn(name, str(cm.exception)) + self.assertIn("NULL", str(cm.exception)) + + def test_def_multiple_exec(self): + """PyModule_Exec runs all exec slots of PyModuleDef-defined module""" + mod = _testcapi.module_from_def_multiple_exec(FakeSpec()) + self.assertFalse(hasattr(mod, 'a_number')) + _testcapi.pymodule_exec(mod) + self.assertEqual(mod.a_number, 456) + self.assertEqual(mod.another_number, 789) + _testcapi.pymodule_exec(mod) + self.assertEqual(mod.a_number, 456) + self.assertEqual(mod.another_number, -789) + def_ptr, token = def_and_token(mod) + self.assertEqual(def_ptr, token) + + def test_def_token(self): + """In PyModuleDef-defined modules, the def is the token""" + mod = _testcapi.module_from_def_multiple_exec(FakeSpec()) + def_ptr, token = def_and_token(mod) + self.assertEqual(def_ptr, token) + self.assertGreater(def_ptr, 0) + + @subTests('name, expected_size', [ + (__name__, 0), # Python module + ('_testsinglephase', -1), # single-phase init + ('sys', -1), + ]) + def test_get_state_size(self, name, expected_size): + mod = import_helper.import_module(name) + size = _testcapi.pymodule_get_state_size(mod) + self.assertEqual(size, expected_size) diff --git a/Lib/test/test_capi/test_object.py b/Lib/test/test_capi/test_object.py index d4056727d07..c5040913e9e 100644 --- a/Lib/test/test_capi/test_object.py +++ b/Lib/test/test_capi/test_object.py @@ -1,4 +1,5 @@ import enum +import os import sys import textwrap import unittest @@ -13,6 +14,9 @@ _testcapi = import_helper.import_module('_testcapi') _testinternalcapi = import_helper.import_module('_testinternalcapi') +NULL = None +STDERR_FD = 2 + class Constant(enum.IntEnum): Py_CONSTANT_NONE = 0 @@ -247,5 +251,53 @@ def func(x): func(object()) + def pyobject_dump(self, obj, release_gil=False): + pyobject_dump = _testcapi.pyobject_dump + + try: + old_stderr = os.dup(STDERR_FD) + except OSError as exc: + # os.dup(STDERR_FD) is not supported on WASI + self.skipTest(f"os.dup() failed with {exc!r}") + + filename = os_helper.TESTFN + try: + try: + with open(filename, "wb") as fp: + fd = fp.fileno() + os.dup2(fd, STDERR_FD) + pyobject_dump(obj, release_gil) + finally: + os.dup2(old_stderr, STDERR_FD) + os.close(old_stderr) + + with open(filename) as fp: + return fp.read().rstrip() + finally: + os_helper.unlink(filename) + + def test_pyobject_dump(self): + # test string object + str_obj = 'test string' + output = self.pyobject_dump(str_obj) + hex_regex = r'(0x)?[0-9a-fA-F]+' + regex = ( + fr"object address : {hex_regex}\n" + r"object refcount : [0-9]+\n" + fr"object type : {hex_regex}\n" + r"object type name: str\n" + r"object repr : 'test string'" + ) + self.assertRegex(output, regex) + + # release the GIL + output = self.pyobject_dump(str_obj, release_gil=True) + self.assertRegex(output, regex) + + # test NULL object + output = self.pyobject_dump(NULL) + self.assertRegex(output, r'') + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 4e94f62d35e..51234a2e40f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -40,6 +40,17 @@ def get_first_executor(func): pass return None +def get_all_executors(func): + code = func.__code__ + co_code = code.co_code + executors = [] + for i in range(0, len(co_code), 2): + try: + executors.append(_opcode.get_executor(code, i)) + except ValueError: + pass + return executors + def iter_opnames(ex): for item in ex: @@ -422,32 +433,6 @@ def testfunc(n, m): uops = get_opnames(ex) self.assertIn("_FOR_ITER_TIER_TWO", uops) - def test_confidence_score(self): - def testfunc(n): - bits = 0 - for i in range(n): - if i & 0x01: - bits += 1 - if i & 0x02: - bits += 1 - if i&0x04: - bits += 1 - if i&0x08: - bits += 1 - if i&0x10: - bits += 1 - return bits - - x = testfunc(TIER2_THRESHOLD * 2) - - self.assertEqual(x, TIER2_THRESHOLD * 5) - ex = get_first_executor(testfunc) - self.assertIsNotNone(ex) - ops = list(iter_opnames(ex)) - #Since branch is 50/50 the trace could go either way. - count = ops.count("_GUARD_IS_TRUE_POP") + ops.count("_GUARD_IS_FALSE_POP") - self.assertLessEqual(count, 2) - @requires_specialization @unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @@ -847,38 +832,7 @@ def testfunc(n): self.assertLessEqual(len(guard_nos_unicode_count), 1) self.assertIn("_COMPARE_OP_STR", uops) - def test_type_inconsistency(self): - ns = {} - src = textwrap.dedent(""" - def testfunc(n): - for i in range(n): - x = _test_global + _test_global - """) - exec(src, ns, ns) - testfunc = ns['testfunc'] - ns['_test_global'] = 0 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNone(ex) - ns['_test_global'] = 1 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNotNone(ex) - uops = get_opnames(ex) - self.assertNotIn("_GUARD_TOS_INT", uops) - self.assertNotIn("_GUARD_NOS_INT", uops) - self.assertNotIn("_BINARY_OP_ADD_INT", uops) - self.assertNotIn("_POP_TWO_LOAD_CONST_INLINE_BORROW", uops) - # Try again, but between the runs, set the global to a float. - # This should result in no executor the second time. - ns = {} - exec(src, ns, ns) - testfunc = ns['testfunc'] - ns['_test_global'] = 0 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNone(ex) - ns['_test_global'] = 3.14 - _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD - 1) - self.assertIsNone(ex) - + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_sequential(self): def dummy12(x): return x - 1 @@ -907,6 +861,7 @@ def testfunc(n): largest_stack = _testinternalcapi.get_co_framesize(dummy13.__code__) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_nested(self): def dummy12(x): return x + 3 @@ -937,6 +892,7 @@ def testfunc(n): ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_several_calls(self): def dummy12(x): return x + 3 @@ -972,6 +928,7 @@ def testfunc(n): ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_several_calls_different_order(self): # same as `several_calls` but with top-level calls reversed def dummy12(x): @@ -1008,6 +965,7 @@ def testfunc(n): ) self.assertIn(("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_complex(self): def dummy0(x): return x @@ -1057,6 +1015,7 @@ def testfunc(n): ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands ) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_large_framesize(self): # Create a function with a large framesize. This ensures _CHECK_STACK_SPACE is # actually doing its job. Note that the resulting trace hits @@ -1118,6 +1077,7 @@ def testfunc(n): ("_CHECK_STACK_SPACE_OPERAND", largest_stack), uops_and_operands ) + @unittest.skip("gh-139109 WIP") def test_combine_stack_space_checks_recursion(self): def dummy15(x): while x > 0: @@ -2520,8 +2480,6 @@ def testfunc(n): testfunc(_testinternalcapi.TIER2_THRESHOLD) - ex = get_first_executor(testfunc) - assert ex is not None """)) def test_pop_top_specialize_none(self): @@ -2660,6 +2618,82 @@ def f(): f() + def test_interpreter_finalization_with_generator_alive(self): + script_helper.assert_python_ok("-c", textwrap.dedent(""" + import sys + t = tuple(range(%d)) + def simple_for(): + for x in t: + x + + def gen(): + try: + yield + except: + simple_for() + + sys.settrace(lambda *args: None) + simple_for() + g = gen() + next(g) + """ % _testinternalcapi.SPECIALIZATION_THRESHOLD)) + + def test_executor_side_exits_create_another_executor(self): + def f(): + for x in range(TIER2_THRESHOLD + 3): + for y in range(TIER2_THRESHOLD + 3): + z = x + y + + f() + all_executors = get_all_executors(f) + # Inner loop warms up first. + # Outer loop warms up later, linking to the inner one. + # Therefore, we have at least two executors. + self.assertGreaterEqual(len(all_executors), 2) + for executor in all_executors: + opnames = list(get_opnames(executor)) + # Assert all executors first terminator ends in + # _EXIT_TRACE or _JUMP_TO_TOP, not _DEOPT + for idx, op in enumerate(opnames): + if op == "_EXIT_TRACE" or op == "_JUMP_TO_TOP": + break + elif op == "_DEOPT": + self.fail(f"_DEOPT encountered first at executor" + f" {executor} at offset {idx} rather" + f" than expected _EXIT_TRACE") + + def test_enter_executor_valid_op_arg(self): + script_helper.assert_python_ok("-c", textwrap.dedent(""" + import sys + sys.setrecursionlimit(30) # reduce time of the run + + str_v1 = '' + tuple_v2 = (None, None, None, None, None) + small_int_v3 = 4 + + def f1(): + + for _ in range(10): + abs(0) + + tuple_v2[small_int_v3] + tuple_v2[small_int_v3] + tuple_v2[small_int_v3] + + def recursive_wrapper_4569(): + str_v1 > str_v1 + str_v1 > str_v1 + str_v1 > str_v1 + recursive_wrapper_4569() + + recursive_wrapper_4569() + + for i_f1 in range(19000): + try: + f1() + except RecursionError: + pass + """)) def global_identity(x): diff --git a/Lib/test/test_capi/test_type.py b/Lib/test/test_capi/test_type.py index 93874fbee32..e6a8ef9eed6 100644 --- a/Lib/test/test_capi/test_type.py +++ b/Lib/test/test_capi/test_type.py @@ -195,6 +195,24 @@ class H2(int): pass with self.assertRaises(TypeError): _testcapi.pytype_getmodulebydef(H2) + def test_get_module_by_token(self): + token = _testcapi.pymodule_get_token(_testcapi) + + heaptype = _testcapi.create_type_with_token('_testcapi.H', 0) + mod = _testcapi.pytype_getmodulebytoken(heaptype, token) + self.assertIs(mod, _testcapi) + + class H1(heaptype): pass + mod = _testcapi.pytype_getmodulebytoken(H1, token) + self.assertIs(mod, _testcapi) + + with self.assertRaises(TypeError): + _testcapi.pytype_getmodulebytoken(int, token) + + class H2(int): pass + with self.assertRaises(TypeError): + _testcapi.pytype_getmodulebytoken(H2, token) + def test_freeze(self): # test PyType_Freeze() type_freeze = _testcapi.type_freeze diff --git a/Lib/test/test_cext/__init__.py b/Lib/test/test_cext/__init__.py index fb93c6ccbb6..a52c2241f5d 100644 --- a/Lib/test/test_cext/__init__.py +++ b/Lib/test/test_cext/__init__.py @@ -14,7 +14,6 @@ SOURCES = [ os.path.join(os.path.dirname(__file__), 'extension.c'), - os.path.join(os.path.dirname(__file__), 'create_moduledef.c'), ] SETUP = os.path.join(os.path.dirname(__file__), 'setup.py') diff --git a/Lib/test/test_cext/create_moduledef.c b/Lib/test/test_cext/create_moduledef.c deleted file mode 100644 index 249c3163552..00000000000 --- a/Lib/test/test_cext/create_moduledef.c +++ /dev/null @@ -1,29 +0,0 @@ -// Workaround for testing _Py_OPAQUE_PYOBJECT. -// See end of 'extension.c' - - -#undef _Py_OPAQUE_PYOBJECT -#undef Py_LIMITED_API -#include "Python.h" - - -// (repeated definition to avoid creating a header) -extern PyObject *testcext_create_moduledef( - const char *name, const char *doc, - PyMethodDef *methods, PyModuleDef_Slot *slots); - -PyObject *testcext_create_moduledef( - const char *name, const char *doc, - PyMethodDef *methods, PyModuleDef_Slot *slots) { - - static struct PyModuleDef _testcext_module = { - PyModuleDef_HEAD_INIT, - }; - if (!_testcext_module.m_name) { - _testcext_module.m_name = name; - _testcext_module.m_doc = doc; - _testcext_module.m_methods = methods; - _testcext_module.m_slots = slots; - } - return PyModuleDef_Init(&_testcext_module); -} diff --git a/Lib/test/test_cext/extension.c b/Lib/test/test_cext/extension.c index 73fc67ae59d..0f668c1da32 100644 --- a/Lib/test/test_cext/extension.c +++ b/Lib/test/test_cext/extension.c @@ -78,7 +78,7 @@ _testcext_exec( return 0; } -#define _FUNC_NAME(NAME) PyInit_ ## NAME +#define _FUNC_NAME(NAME) PyModExport_ ## NAME #define FUNC_NAME(NAME) _FUNC_NAME(NAME) // Converting from function pointer to void* has undefined behavior, but @@ -88,58 +88,40 @@ _testcext_exec( _Py_COMP_DIAG_PUSH #if defined(__GNUC__) #pragma GCC diagnostic ignored "-Wpedantic" +#pragma GCC diagnostic ignored "-Wcast-qual" #elif defined(__clang__) #pragma clang diagnostic ignored "-Wpedantic" +#pragma clang diagnostic ignored "-Wcast-qual" #endif +PyDoc_STRVAR(_testcext_doc, "C test extension."); + static PyModuleDef_Slot _testcext_slots[] = { + {Py_mod_name, STR(MODULE_NAME)}, + {Py_mod_doc, (void*)(char*)_testcext_doc}, {Py_mod_exec, (void*)_testcext_exec}, + {Py_mod_methods, _testcext_methods}, {0, NULL} }; _Py_COMP_DIAG_POP -PyDoc_STRVAR(_testcext_doc, "C test extension."); - -#ifndef _Py_OPAQUE_PYOBJECT - -static struct PyModuleDef _testcext_module = { - PyModuleDef_HEAD_INIT, // m_base - STR(MODULE_NAME), // m_name - _testcext_doc, // m_doc - 0, // m_size - _testcext_methods, // m_methods - _testcext_slots, // m_slots - NULL, // m_traverse - NULL, // m_clear - NULL, // m_free -}; - - -PyMODINIT_FUNC +PyMODEXPORT_FUNC FUNC_NAME(MODULE_NAME)(void) { - return PyModuleDef_Init(&_testcext_module); + return _testcext_slots; } -#else // _Py_OPAQUE_PYOBJECT - -// Opaque PyObject means that PyModuleDef is also opaque and cannot be -// declared statically. See PEP 793. -// So, this part of module creation is split into a separate source file -// which uses non-limited API. - -// (repeated definition to avoid creating a header) -extern PyObject *testcext_create_moduledef( - const char *name, const char *doc, - PyMethodDef *methods, PyModuleDef_Slot *slots); +// Also define the soft-deprecated entrypoint to ensure it isn't called +#define _INITFUNC_NAME(NAME) PyInit_ ## NAME +#define INITFUNC_NAME(NAME) _INITFUNC_NAME(NAME) PyMODINIT_FUNC -FUNC_NAME(MODULE_NAME)(void) +INITFUNC_NAME(MODULE_NAME)(void) { - return testcext_create_moduledef( - STR(MODULE_NAME), _testcext_doc, _testcext_methods, _testcext_slots); + PyErr_SetString( + PyExc_AssertionError, + "PyInit_* function called while a PyModExport_* one is available"); + return NULL; } - -#endif // _Py_OPAQUE_PYOBJECT diff --git a/Lib/test/test_cext/setup.py b/Lib/test/test_cext/setup.py index 4d71e4751f7..67dfddec751 100644 --- a/Lib/test/test_cext/setup.py +++ b/Lib/test/test_cext/setup.py @@ -99,7 +99,6 @@ def main(): # Define _Py_OPAQUE_PYOBJECT macro if opaque_pyobject: cflags.append(f'-D_Py_OPAQUE_PYOBJECT') - sources.append('create_moduledef.c') if internal: cflags.append('-DTEST_INTERNAL_C_API=1') diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index e0dbb062eb0..e71f9fc181b 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -4,6 +4,7 @@ from functools import partial from test import support, test_tools +from test.support import force_not_colorized_test_class from test.support import os_helper from test.support.os_helper import TESTFN, unlink, rmtree from textwrap import dedent @@ -2758,6 +2759,7 @@ def test_allow_negative_accepted_by_py_ssize_t_converter_only(self): with self.assertRaisesRegex((AssertionError, TypeError), errmsg): self.parse_function(block) +@force_not_colorized_test_class class ClinicExternalTest(TestCase): maxDiff = None diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index f8115cc8300..cc1a625a509 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -814,15 +814,26 @@ def test_filter_syntax_warnings_by_module(self): filename = support.findfile('test_import/data/syntax_warnings.py') rc, out, err = assert_python_ok( '-Werror', - '-Walways:::test.test_import.data.syntax_warnings', + '-Walways:::__main__', + '-Werror:::test.test_import.data.syntax_warnings', + '-Werror:::syntax_warnings', filename) self.assertEqual(err.count(b': SyntaxWarning: '), 6) - rc, out, err = assert_python_ok( - '-Werror', - '-Walways:::syntax_warnings', - filename) - self.assertEqual(err.count(b': SyntaxWarning: '), 6) + def test_zipfile_run_filter_syntax_warnings_by_module(self): + filename = support.findfile('test_import/data/syntax_warnings.py') + with open(filename, 'rb') as f: + source = f.read() + with os_helper.temp_dir() as script_dir: + zip_name, _ = make_zip_pkg( + script_dir, 'test_zip', 'test_pkg', '__main__', source) + rc, out, err = assert_python_ok( + '-Werror', + '-Walways:::__main__', + '-Werror:::test_pkg.__main__', + os.path.join(zip_name, 'test_pkg') + ) + self.assertEqual(err.count(b': SyntaxWarning: '), 12) def tearDownModule(): diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index c35a4508943..c31faec9ee5 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -13,6 +13,7 @@ from test import support from test.support import os_helper +from test.support import warnings_helper try: import _testlimitedcapi @@ -3886,15 +3887,14 @@ def search_function(encoding): self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa-8', 2, 3, 4)) self.assertEqual(codecs.lookup('TEST.AAA---8'), ('test.aaa---8', 2, 3, 4)) self.assertEqual(codecs.lookup('TEST.AAA 8'), ('test.aaa---8', 2, 3, 4)) - self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4)) self.assertEqual(codecs.lookup('TEST.AAA.8'), ('test.aaa.8', 2, 3, 4)) self.assertEqual(codecs.lookup('TEST.AAA...8'), ('test.aaa...8', 2, 3, 4)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(codecs.lookup('TEST.AAA\xe9\u20ac-8'), ('test.aaa\xe9\u20ac-8', 2, 3, 4)) def test_encodings_normalize_encoding(self): - # encodings.normalize_encoding() ignores non-ASCII characters. normalize = encodings.normalize_encoding self.assertEqual(normalize('utf_8'), 'utf_8') - self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') self.assertEqual(normalize('utf 8'), 'utf_8') # encodings.normalize_encoding() doesn't convert # characters to lower case. @@ -3902,6 +3902,11 @@ def test_encodings_normalize_encoding(self): self.assertEqual(normalize('utf.8'), 'utf.8') self.assertEqual(normalize('utf...8'), 'utf...8') + # Non-ASCII *encoding* is deprecated. + msg = "Support for non-ascii encoding names will be removed in 3.17" + with warnings_helper.check_warnings((msg, DeprecationWarning)): + self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 9c2364491fe..30f21875b22 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -1759,6 +1759,16 @@ def test_filter_syntax_warnings_by_module(self): self.assertEqual(wm.filename, filename) self.assertIs(wm.category, SyntaxWarning) + with warnings.catch_warnings(record=True) as wlog: + warnings.simplefilter('error') + warnings.filterwarnings('always', module=r'package\.module\z') + warnings.filterwarnings('error', module=module_re) + compile(source, filename, 'exec', module='package.module') + self.assertEqual(sorted(wm.lineno for wm in wlog), [4, 7, 10, 13, 14, 21]) + for wm in wlog: + self.assertEqual(wm.filename, filename) + self.assertIs(wm.category, SyntaxWarning) + @support.subTests('src', [ textwrap.dedent(""" def f(): diff --git a/Lib/test/test_concurrent_futures/test_process_pool.py b/Lib/test/test_concurrent_futures/test_process_pool.py index 9685f980119..731419a48bd 100644 --- a/Lib/test/test_concurrent_futures/test_process_pool.py +++ b/Lib/test/test_concurrent_futures/test_process_pool.py @@ -106,6 +106,21 @@ def test_traceback(self): self.assertIn('raise RuntimeError(123) # some comment', f1.getvalue()) + def test_traceback_when_child_process_terminates_abruptly(self): + # gh-139462 enhancement - BrokenProcessPool exceptions + # should describe which process terminated. + exit_code = 99 + with self.executor_type(max_workers=1) as executor: + future = executor.submit(os._exit, exit_code) + with self.assertRaises(BrokenProcessPool) as bpe: + future.result() + + cause = bpe.exception.__cause__ + self.assertIsInstance(cause, futures.process._RemoteTraceback) + self.assertIn( + f"terminated abruptly with exit code {exit_code}", cause.tb + ) + @warnings_helper.ignore_fork_in_thread_deprecation_warnings() @hashlib_helper.requires_hashdigest('md5') def test_ressources_gced_in_workers(self): diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index 467ec09d99e..cfef24727e8 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -672,7 +672,7 @@ def __eq__(self, other): def test_reduce_5tuple(self): class C(dict): def __reduce__(self): - return (C, (), self.__dict__, None, self.items()) + return (C, (), self.__dict__, None, iter(self.items())) def __eq__(self, other): return (dict(self) == dict(other) and self.__dict__ == other.__dict__) diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 6be6a7ae222..df79840088a 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -918,6 +918,14 @@ def test_dict_reader_fieldnames_accepts_list(self): reader = csv.DictReader(f, fieldnames) self.assertEqual(reader.fieldnames, fieldnames) + def test_dict_reader_set_fieldnames(self): + fieldnames = ["a", "b", "c"] + f = StringIO() + reader = csv.DictReader(f) + self.assertIsNone(reader.fieldnames) + reader.fieldnames = fieldnames + self.assertEqual(reader.fieldnames, fieldnames) + def test_dict_writer_fieldnames_rejects_iter(self): fieldnames = ["a", "b", "c"] f = StringIO() @@ -933,6 +941,7 @@ def test_dict_writer_fieldnames_accepts_list(self): def test_dict_reader_fieldnames_is_optional(self): f = StringIO() reader = csv.DictReader(f, fieldnames=None) + self.assertIsNone(reader.fieldnames) def test_read_dict_fields(self): with TemporaryFile("w+", encoding="utf-8") as fileobj: @@ -1353,6 +1362,19 @@ class TestSniffer(unittest.TestCase): ghi\0jkl """ + sample15 = "\n\n\n" + sample16 = "abc\ndef\nghi" + + sample17 = ["letter,offset"] + sample17.extend(f"{chr(ord('a') + i)},{i}" for i in range(20)) + sample17.append("v,twenty_one") # 'u' was skipped + sample17 = '\n'.join(sample17) + + sample18 = ["letter,offset"] + sample18.extend(f"{chr(ord('a') + i)},{i}" for i in range(21)) + sample18.append("v,twenty_one") # 'u' was not skipped + sample18 = '\n'.join(sample18) + def test_issue43625(self): sniffer = csv.Sniffer() self.assertTrue(sniffer.has_header(self.sample12)) @@ -1374,6 +1396,11 @@ def test_has_header_regex_special_delimiter(self): self.assertIs(sniffer.has_header(self.sample8), False) self.assertIs(sniffer.has_header(self.header2 + self.sample8), True) + def test_has_header_checks_20_rows(self): + sniffer = csv.Sniffer() + self.assertFalse(sniffer.has_header(self.sample17)) + self.assertTrue(sniffer.has_header(self.sample18)) + def test_guess_quote_and_delimiter(self): sniffer = csv.Sniffer() for header in (";'123;4';", "'123;4';", ";'123;4'", "'123;4'"): @@ -1423,6 +1450,10 @@ def test_delimiters(self): self.assertEqual(dialect.quotechar, "'") dialect = sniffer.sniff(self.sample14) self.assertEqual(dialect.delimiter, '\0') + self.assertRaisesRegex(csv.Error, "Could not determine delimiter", + sniffer.sniff, self.sample15) + self.assertRaisesRegex(csv.Error, "Could not determine delimiter", + sniffer.sniff, self.sample16) def test_doublequote(self): sniffer = csv.Sniffer() diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index 6bf5e5b3e55..3b335429b98 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -927,6 +927,20 @@ class C: validate_class(C) + def test_incomplete_annotations(self): + # gh-142214 + @dataclass + class C: + "doc" # needed because otherwise we fetch the annotations at the wrong time + x: int + + C.__annotate__ = lambda _: {} + + self.assertEqual( + annotationlib.get_annotations(C.__init__), + {"return": None} + ) + def test_missing_default(self): # Test that MISSING works the same as a default not being # specified. @@ -2471,6 +2485,149 @@ def __init__(self, a): self.assertEqual(D(5).a, 10) +class TestInitAnnotate(unittest.TestCase): + # Tests for the generated __annotate__ function for __init__ + # See: https://github.com/python/cpython/issues/137530 + + def test_annotate_function(self): + # No forward references + @dataclass + class A: + a: int + + value_annos = annotationlib.get_annotations(A.__init__, format=annotationlib.Format.VALUE) + forwardref_annos = annotationlib.get_annotations(A.__init__, format=annotationlib.Format.FORWARDREF) + string_annos = annotationlib.get_annotations(A.__init__, format=annotationlib.Format.STRING) + + self.assertEqual(value_annos, {'a': int, 'return': None}) + self.assertEqual(forwardref_annos, {'a': int, 'return': None}) + self.assertEqual(string_annos, {'a': 'int', 'return': 'None'}) + + self.assertTrue(getattr(A.__init__.__annotate__, "__generated_by_dataclasses__")) + + def test_annotate_function_forwardref(self): + # With forward references + @dataclass + class B: + b: undefined + + # VALUE annotations should raise while unresolvable + with self.assertRaises(NameError): + _ = annotationlib.get_annotations(B.__init__, format=annotationlib.Format.VALUE) + + forwardref_annos = annotationlib.get_annotations(B.__init__, format=annotationlib.Format.FORWARDREF) + string_annos = annotationlib.get_annotations(B.__init__, format=annotationlib.Format.STRING) + + self.assertEqual(forwardref_annos, {'b': support.EqualToForwardRef('undefined', owner=B, is_class=True), 'return': None}) + self.assertEqual(string_annos, {'b': 'undefined', 'return': 'None'}) + + # Now VALUE and FORWARDREF should resolve, STRING should be unchanged + undefined = int + + value_annos = annotationlib.get_annotations(B.__init__, format=annotationlib.Format.VALUE) + forwardref_annos = annotationlib.get_annotations(B.__init__, format=annotationlib.Format.FORWARDREF) + string_annos = annotationlib.get_annotations(B.__init__, format=annotationlib.Format.STRING) + + self.assertEqual(value_annos, {'b': int, 'return': None}) + self.assertEqual(forwardref_annos, {'b': int, 'return': None}) + self.assertEqual(string_annos, {'b': 'undefined', 'return': 'None'}) + + def test_annotate_function_init_false(self): + # Check `init=False` attributes don't get into the annotations of the __init__ function + @dataclass + class C: + c: str = field(init=False) + + self.assertEqual(annotationlib.get_annotations(C.__init__), {'return': None}) + + def test_annotate_function_contains_forwardref(self): + # Check string annotations on objects containing a ForwardRef + @dataclass + class D: + d: list[undefined] + + with self.assertRaises(NameError): + annotationlib.get_annotations(D.__init__) + + self.assertEqual( + annotationlib.get_annotations(D.__init__, format=annotationlib.Format.FORWARDREF), + {"d": list[support.EqualToForwardRef("undefined", is_class=True, owner=D)], "return": None} + ) + + self.assertEqual( + annotationlib.get_annotations(D.__init__, format=annotationlib.Format.STRING), + {"d": "list[undefined]", "return": "None"} + ) + + # Now test when it is defined + undefined = str + + # VALUE should now resolve + self.assertEqual( + annotationlib.get_annotations(D.__init__), + {"d": list[str], "return": None} + ) + + self.assertEqual( + annotationlib.get_annotations(D.__init__, format=annotationlib.Format.FORWARDREF), + {"d": list[str], "return": None} + ) + + self.assertEqual( + annotationlib.get_annotations(D.__init__, format=annotationlib.Format.STRING), + {"d": "list[undefined]", "return": "None"} + ) + + def test_annotate_function_not_replaced(self): + # Check that __annotate__ is not replaced on non-generated __init__ functions + @dataclass(slots=True) + class E: + x: str + def __init__(self, x: int) -> None: + self.x = x + + self.assertEqual( + annotationlib.get_annotations(E.__init__), {"x": int, "return": None} + ) + + self.assertFalse(hasattr(E.__init__.__annotate__, "__generated_by_dataclasses__")) + + def test_slots_true_init_false(self): + # Test that slots=True and init=False work together and + # that __annotate__ is not added to __init__. + + @dataclass(slots=True, init=False) + class F: + x: int + + f = F() + f.x = 10 + self.assertEqual(f.x, 10) + + self.assertFalse(hasattr(F.__init__, "__annotate__")) + + def test_init_false_forwardref(self): + # Test forward references in fields not required for __init__ annotations. + + # At the moment this raises a NameError for VALUE annotations even though the + # undefined annotation is not required for the __init__ annotations. + # Ideally this will be fixed but currently there is no good way to resolve this + + @dataclass + class F: + not_in_init: list[undefined] = field(init=False, default=None) + in_init: int + + annos = annotationlib.get_annotations(F.__init__, format=annotationlib.Format.FORWARDREF) + self.assertEqual( + annos, + {"in_init": int, "return": None}, + ) + + with self.assertRaises(NameError): + annos = annotationlib.get_annotations(F.__init__) # NameError on not_in_init + + class TestRepr(unittest.TestCase): def test_repr(self): @dataclass @@ -3831,7 +3988,15 @@ def method(self) -> int: return SlotsTest - for make in (make_simple, make_with_annotations, make_with_annotations_and_method): + def make_with_forwardref(): + @dataclass(slots=True) + class SlotsTest: + x: undefined + y: list[undefined] + + return SlotsTest + + for make in (make_simple, make_with_annotations, make_with_annotations_and_method, make_with_forwardref): with self.subTest(make=make): C = make() support.gc_collect() diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index 14f94285d3f..82a48ad4d1a 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -1329,18 +1329,17 @@ class D(object): self.assertNotHasAttr(a, "__weakref__") a.foo = 42 self.assertEqual(a.__dict__, {"foo": 42}) + with self.assertRaises(TypeError): + weakref.ref(a) class W(object): __slots__ = ["__weakref__"] a = W() self.assertHasAttr(a, "__weakref__") self.assertNotHasAttr(a, "__dict__") - try: + with self.assertRaises(AttributeError): a.foo = 42 - except AttributeError: - pass - else: - self.fail("shouldn't be allowed to set a.foo") + self.assertIs(weakref.ref(a)(), a) class C1(W, D): __slots__ = [] @@ -1349,6 +1348,7 @@ class C1(W, D): self.assertHasAttr(a, "__weakref__") a.foo = 42 self.assertEqual(a.__dict__, {"foo": 42}) + self.assertIs(weakref.ref(a)(), a) class C2(D, W): __slots__ = [] @@ -1357,6 +1357,77 @@ class C2(D, W): self.assertHasAttr(a, "__weakref__") a.foo = 42 self.assertEqual(a.__dict__, {"foo": 42}) + self.assertIs(weakref.ref(a)(), a) + + @unittest.skipIf(_testcapi is None, 'need the _testcapi module') + def test_slots_special_before_items(self): + class D(_testcapi.HeapCCollection): + __slots__ = ["__dict__"] + a = D(1, 2, 3) + self.assertHasAttr(a, "__dict__") + self.assertNotHasAttr(a, "__weakref__") + a.foo = 42 + self.assertEqual(a.__dict__, {"foo": 42}) + with self.assertRaises(TypeError): + weakref.ref(a) + del a.__dict__ + self.assertNotHasAttr(a, "foo") + self.assertEqual(a.__dict__, {}) + self.assertEqual(list(a), [1, 2, 3]) + + class W(_testcapi.HeapCCollection): + __slots__ = ["__weakref__"] + a = W(1, 2, 3) + self.assertHasAttr(a, "__weakref__") + self.assertNotHasAttr(a, "__dict__") + with self.assertRaises(AttributeError): + a.foo = 42 + self.assertIs(weakref.ref(a)(), a) + + with self.assertRaises(TypeError): + class X(_testcapi.HeapCCollection): + __slots__ = ['x'] + + with self.assertRaises(TypeError): + class X(_testcapi.HeapCCollection): + __slots__ = ['__dict__', 'x'] + + @support.subTests(('base', 'arg'), [ + (tuple, (1, 2, 3)), + (int, 9876543210**2), + (bytes, b'ab'), + ]) + def test_slots_special_after_items(self, base, arg): + class D(base): + __slots__ = ["__dict__"] + a = D(arg) + self.assertHasAttr(a, "__dict__") + self.assertNotHasAttr(a, "__weakref__") + a.foo = 42 + self.assertEqual(a.__dict__, {"foo": 42}) + with self.assertRaises(TypeError): + weakref.ref(a) + del a.__dict__ + self.assertNotHasAttr(a, "foo") + self.assertEqual(a.__dict__, {}) + self.assertEqual(a, base(arg)) + + class W(base): + __slots__ = ["__weakref__"] + a = W(arg) + self.assertHasAttr(a, "__weakref__") + self.assertNotHasAttr(a, "__dict__") + with self.assertRaises(AttributeError): + a.foo = 42 + self.assertIs(weakref.ref(a)(), a) + self.assertEqual(a, base(arg)) + + with self.assertRaises(TypeError): + class X(base): + __slots__ = ['x'] + with self.assertRaises(TypeError): + class X(base): + __slots__ = ['__dict__', 'x'] def test_slots_special2(self): # Testing __qualname__ and __classcell__ in __slots__ diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 2e6c2bbdf19..665b3e843dd 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1621,6 +1621,14 @@ def __eq__(self, other): self.assertEqual(len(d), 1) + def test_split_table_update_with_str_subclass(self): + class MyStr(str): pass + class MyClass: pass + obj = MyClass() + obj.attr = 1 + obj.__dict__[MyStr('attr')] = 2 + self.assertEqual(obj.attr, 2) + class CAPITest(unittest.TestCase): diff --git a/Lib/test/test_email/data/msg_35.txt b/Lib/test/test_email/data/msg_35.txt index be7d5a2f7b9..0e2bbcaf718 100644 --- a/Lib/test/test_email/data/msg_35.txt +++ b/Lib/test/test_email/data/msg_35.txt @@ -1,4 +1,4 @@ From: aperson@dom.ain To: bperson@dom.ain Subject: here's something interesting -counter to RFC 2822, there's no separating newline here +counter to RFC 5322, there's no separating newline here diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b458d3f0efa..4020f1041c4 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -41,6 +41,7 @@ from test import support from test.support import threading_helper +from test.support import warnings_helper from test.support.os_helper import unlink from test.test_email import openfile, TestEmailBase @@ -2373,7 +2374,7 @@ def test_no_separating_blank_line(self): To: bperson@dom.ain Subject: here's something interesting -counter to RFC 2822, there's no separating newline here +counter to RFC 5322, there's no separating newline here """) # test_defect_handling @@ -2529,49 +2530,49 @@ def test_rfc2047_Q_invalid_digits(self): [(b'andr\xe9=zz', 'iso-8859-1')]) def test_rfc2047_rfc2047_1(self): - # 1st testcase at end of rfc2047 + # 1st testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_2(self): - # 2nd testcase at end of rfc2047 + # 2nd testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= b)' self.assertEqual(decode_header(s), [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)]) def test_rfc2047_rfc2047_3(self): - # 3rd testcase at end of rfc2047 + # 3rd testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_4(self): - # 4th testcase at end of rfc2047 + # 4th testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_5a(self): - # 5th testcase at end of rfc2047 newline is \r\n + # 5th testcase at end of RFC 2047 newline is \r\n s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_5b(self): - # 5th testcase at end of rfc2047 newline is \n + # 5th testcase at end of RFC 2047 newline is \n s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_6(self): - # 6th testcase at end of rfc2047 + # 6th testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a_b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)]) def test_rfc2047_rfc2047_7(self): - # 7th testcase at end of rfc2047 + # 7th testcase at end of RFC 2047 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)' self.assertEqual(decode_header(s), [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'), @@ -3273,8 +3274,8 @@ def test_parsedate_y2k(self): """Test for parsing a date with a two-digit year. Parsing a date with a two-digit year should return the correct - four-digit year. RFC822 allows two-digit years, but RFC2822 (which - obsoletes RFC822) requires four-digit years. + four-digit year. RFC 822 allows two-digit years, but RFC 5322 (which + obsoletes RFC 2822, which obsoletes RFC 822) requires four-digit years. """ self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'), @@ -3325,7 +3326,7 @@ def test_escape_backslashes(self): self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b)) def test_quotes_unicode_names(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= " @@ -3335,7 +3336,7 @@ def test_quotes_unicode_names(self): latin1_quopri) def test_accepts_any_charset_like_object(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= " @@ -3350,7 +3351,7 @@ def header_encode(self, string): utf8_base64) def test_invalid_charset_like_object_raises_error(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. name = "H\u00e4ns W\u00fcrst" addr = 'person@dom.ain' # An object without a header_encode method: @@ -3359,7 +3360,7 @@ def test_invalid_charset_like_object_raises_error(self): bad_charset) def test_unicode_address_raises_error(self): - # issue 1690608. email.utils.formataddr() should be rfc2047 aware. + # issue 1690608. email.utils.formataddr() should be RFC 2047 aware. addr = 'pers\u00f6n@dom.in' self.assertRaises(UnicodeError, utils.formataddr, (None, addr)) self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr)) @@ -3380,7 +3381,7 @@ def test_parseaddr_preserves_quoted_pairs_in_addresses(self): # string containing a quoted backslash, followed by 'example' and two # backslashes, followed by another quoted string containing a space and # the word 'example'. parseaddr copies those two backslashes - # literally. Per rfc5322 this is not technically correct since a \ may + # literally. Per RFC 5322 this is not technically correct since a \ may # not appear in an address outside of a quoted string. It is probably # a sensible Postel interpretation, though. eq = self.assertEqual @@ -3392,12 +3393,12 @@ def test_parseaddr_preserves_quoted_pairs_in_addresses(self): ('', '"\\\\"example\\\\" example"@example.com')) def test_parseaddr_preserves_spaces_in_local_part(self): - # issue 9286. A normal RFC5322 local part should not contain any + # issue 9286. A normal RFC 5322 local part should not contain any # folding white space, but legacy local parts can (they are a sequence # of atoms, not dotatoms). On the other hand we strip whitespace from # before the @ and around dots, on the assumption that the whitespace # around the punctuation is a mistake in what would otherwise be - # an RFC5322 local part. Leading whitespace is, usual, stripped as well. + # an RFC 5322 local part. Leading whitespace is, usual, stripped as well. self.assertEqual(('', "merwok wok@xample.com"), utils.parseaddr("merwok wok@xample.com")) self.assertEqual(('', "merwok wok@xample.com"), @@ -5738,7 +5739,8 @@ def test_rfc2231_bad_character_in_encoding(self): """ msg = email.message_from_string(m) - self.assertEqual(msg.get_filename(), 'myfile.txt') + with warnings_helper.check_warnings(('', DeprecationWarning)): + self.assertEqual(msg.get_filename(), 'myfile.txt') def test_rfc2231_single_tick_in_filename_extended(self): eq = self.assertEqual diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index ff7a6da644d..7138aa4c556 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -8,6 +8,7 @@ from email import headerregistry from email.headerregistry import Address, Group from test.support import ALWAYS_EQ +from test.support import warnings_helper DITTO = object() @@ -247,7 +248,15 @@ def content_type_as_value(self, decoded = args[2] if l>2 and args[2] is not DITTO else source header = 'Content-Type:' + ' ' if source else '' folded = args[3] if l>3 else header + decoded + '\n' - h = self.make_header('Content-Type', source) + # Both rfc2231 test cases with utf-8%E2%80%9D raise warnings, + # clear encoding cache to ensure test isolation. + if 'utf-8%E2%80%9D' in source and 'ascii' not in source: + import encodings + encodings._cache.clear() + with warnings_helper.check_warnings(('', DeprecationWarning)): + h = self.make_header('Content-Type', source) + else: + h = self.make_header('Content-Type', source) self.assertEqual(h.content_type, content_type) self.assertEqual(h.maintype, maintype) self.assertEqual(h.subtype, subtype) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 1933f691a78..b5367941227 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -239,6 +239,37 @@ def test_repeated_init_and_inittab(self): lines = "\n".join(lines) + "\n" self.assertEqual(out, lines) + def test_create_module_from_initfunc(self): + out, err = self.run_embedded_interpreter("test_create_module_from_initfunc") + self.assertEqual(self._nogil_filtered_err(err, "embedded_ext"), "") + self.assertEqual(out, + "\n" + "my_test_extension.executed='yes'\n" + "my_test_extension.exec_slot_ran='yes'\n" + "\n" + "embedded_ext.executed='yes'\n" + ) + + def test_inittab_submodule_multiphase(self): + out, err = self.run_embedded_interpreter("test_inittab_submodule_multiphase") + self.assertEqual(err, "") + self.assertEqual(out, + "\n" + "\n" + "Hello from sub-module\n" + "mp_pkg.mp_submod.mp_submod_exec_slot_ran='yes'\n" + "mp_pkg.mp_pkg_exec_slot_ran='yes'\n" + ) + + def test_inittab_submodule_singlephase(self): + out, err = self.run_embedded_interpreter("test_inittab_submodule_singlephase") + self.assertEqual(self._nogil_filtered_err(err, "sp_pkg"), "") + self.assertEqual(out, + "\n" + "\n" + "Hello from sub-module\n" + ) + def test_forced_io_encoding(self): # Checks forced configuration of embedded interpreter IO streams env = dict(os.environ, PYTHONIOENCODING="utf-8:surrogateescape") @@ -516,6 +547,24 @@ def test_getargs_reset_static_parser(self): out, err = self.run_embedded_interpreter("test_repeated_init_exec", code) self.assertEqual(out, '1\n2\n3\n' * INIT_LOOPS) + @staticmethod + def _nogil_filtered_err(err: str, mod_name: str) -> str: + if not support.Py_GIL_DISABLED: + return err + + # the test imports a singlephase init extension, so it emits a warning + # under the free-threaded build + expected_runtime_warning = ( + "RuntimeWarning: The global interpreter lock (GIL)" + f" has been enabled to load module '{mod_name}'" + ) + filtered_err_lines = [ + line + for line in err.strip().splitlines() + if expected_runtime_warning not in line + ] + return "\n".join(filtered_err_lines) + def config_dev_mode(preconfig, config): preconfig['allocator'] = PYMEM_ALLOCATOR_DEBUG diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 5262b58908a..6f212d2f91e 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1923,6 +1923,39 @@ def test_keyerror_context(self): exc2 = None + @cpython_only + # Python built with Py_TRACE_REFS fail with a fatal error in + # _PyRefchain_Trace() on memory allocation error. + @unittest.skipIf(support.Py_TRACE_REFS, 'cannot test Py_TRACE_REFS build') + def test_exec_set_nomemory_hang(self): + import_module("_testcapi") + # gh-134163: A MemoryError inside code that was wrapped by a try/except + # block would lead to an infinite loop. + + # The frame_lasti needs to be greater than 257 to prevent + # PyLong_FromLong() from returning cached integers, which + # don't require a memory allocation. Prepend some dummy code + # to artificially increase the instruction index. + warmup_code = "a = list(range(0, 1))\n" * 60 + user_input = warmup_code + dedent(""" + try: + import _testcapi + _testcapi.set_nomemory(0) + b = list(range(1000, 2000)) + except Exception as e: + import traceback + traceback.print_exc() + """) + with SuppressCrashReport(): + with script_helper.spawn_python('-c', user_input) as p: + p.wait() + output = p.stdout.read() + + self.assertIn(p.returncode, (0, 1)) + self.assertGreater(len(output), 0) # At minimum, should not hang + self.assertIn(b"MemoryError", output) + + class NameErrorTests(unittest.TestCase): def test_name_error_has_name(self): try: diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 01720457e61..7decd8f32d5 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -23,6 +23,12 @@ PROFILING_MODE_WALL = 0 PROFILING_MODE_CPU = 1 PROFILING_MODE_GIL = 2 +PROFILING_MODE_ALL = 3 + +# Thread status flags +THREAD_STATUS_HAS_GIL = (1 << 0) +THREAD_STATUS_ON_CPU = (1 << 1) +THREAD_STATUS_UNKNOWN = (1 << 2) try: from concurrent import interpreters @@ -153,6 +159,8 @@ def foo(): FrameInfo([script_name, 12, "baz"]), FrameInfo([script_name, 9, "bar"]), FrameInfo([threading.__file__, ANY, "Thread.run"]), + FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]), + FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]), ] # Is possible that there are more threads, so we check that the # expected stack traces are in the result (looking at you Windows!) @@ -1763,11 +1771,14 @@ def busy(): for thread_info in interpreter_info.threads: statuses[thread_info.thread_id] = thread_info.status - # Check if sleeper thread is idle and busy thread is running + # Check if sleeper thread is off CPU and busy thread is on CPU + # In the new flags system: + # - sleeper should NOT have ON_CPU flag (off CPU) + # - busy should have ON_CPU flag if (sleeper_tid in statuses and busy_tid in statuses and - statuses[sleeper_tid] == 1 and - statuses[busy_tid] == 0): + not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and + (statuses[busy_tid] & THREAD_STATUS_ON_CPU)): break time.sleep(0.5) # Give a bit of time to let threads settle except PermissionError: @@ -1779,8 +1790,8 @@ def busy(): self.assertIsNotNone(busy_tid, "Busy thread id not received") self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") - self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)") - self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)") + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper thread should be off CPU") + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy thread should be on CPU") finally: if client_socket is not None: @@ -1875,11 +1886,14 @@ def busy(): for thread_info in interpreter_info.threads: statuses[thread_info.thread_id] = thread_info.status - # Check if sleeper thread is idle (status 2 for GIL mode) and busy thread is running + # Check if sleeper thread doesn't have GIL and busy thread has GIL + # In the new flags system: + # - sleeper should NOT have HAS_GIL flag (waiting for GIL) + # - busy should have HAS_GIL flag if (sleeper_tid in statuses and busy_tid in statuses and - statuses[sleeper_tid] == 2 and - statuses[busy_tid] == 0): + not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and + (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)): break time.sleep(0.5) # Give a bit of time to let threads settle except PermissionError: @@ -1891,8 +1905,8 @@ def busy(): self.assertIsNotNone(busy_tid, "Busy thread id not received") self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") - self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread should be idle (1)") - self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)") + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL") + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy thread should have GIL") finally: if client_socket is not None: @@ -1900,6 +1914,128 @@ def busy(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) + @unittest.skipIf( + sys.platform not in ("linux", "darwin", "win32"), + "Test only runs on supported platforms (Linux, macOS, or Windows)", + ) + @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception") + def test_thread_status_all_mode_detection(self): + port = find_unused_port() + script = textwrap.dedent( + f"""\ + import socket + import threading + import time + import sys + + def sleeper_thread(): + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"sleeper:" + str(threading.get_native_id()).encode()) + while True: + time.sleep(1) + + def busy_thread(): + conn = socket.create_connection(("localhost", {port})) + conn.sendall(b"busy:" + str(threading.get_native_id()).encode()) + while True: + sum(range(100000)) + + t1 = threading.Thread(target=sleeper_thread) + t2 = threading.Thread(target=busy_thread) + t1.start() + t2.start() + t1.join() + t2.join() + """ + ) + + with os_helper.temp_dir() as tmp_dir: + script_file = make_script(tmp_dir, "script", script) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(("localhost", port)) + server_socket.listen(2) + server_socket.settimeout(SHORT_TIMEOUT) + + p = subprocess.Popen( + [sys.executable, script_file], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + client_sockets = [] + try: + sleeper_tid = None + busy_tid = None + + # Receive thread IDs from the child process + for _ in range(2): + client_socket, _ = server_socket.accept() + client_sockets.append(client_socket) + line = client_socket.recv(1024) + if line: + if line.startswith(b"sleeper:"): + try: + sleeper_tid = int(line.split(b":")[-1]) + except Exception: + pass + elif line.startswith(b"busy:"): + try: + busy_tid = int(line.split(b":")[-1]) + except Exception: + pass + + server_socket.close() + + attempts = 10 + statuses = {} + try: + unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_ALL, + skip_non_matching_threads=False) + for _ in range(attempts): + traces = unwinder.get_stack_trace() + # Find threads and their statuses + statuses = {} + for interpreter_info in traces: + for thread_info in interpreter_info.threads: + statuses[thread_info.thread_id] = thread_info.status + + # Check ALL mode provides both GIL and CPU info + # - sleeper should NOT have ON_CPU and NOT have HAS_GIL + # - busy should have ON_CPU and have HAS_GIL + if (sleeper_tid in statuses and + busy_tid in statuses and + not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and + not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and + (statuses[busy_tid] & THREAD_STATUS_ON_CPU) and + (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)): + break + time.sleep(0.5) + except PermissionError: + self.skipTest( + "Insufficient permissions to read the stack trace" + ) + + self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received") + self.assertIsNotNone(busy_tid, "Busy thread id not received") + self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads") + self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads") + + # Sleeper thread: off CPU, no GIL + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper should be off CPU") + self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL") + + # Busy thread: on CPU, has GIL + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy should be on CPU") + self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy should have GIL") + + finally: + for client_socket in client_sockets: + client_socket.close() + p.terminate() + p.wait(timeout=SHORT_TIMEOUT) + p.stdout.close() + p.stderr.close() if __name__ == "__main__": diff --git a/Lib/test/test_free_threading/test_capi.py b/Lib/test/test_free_threading/test_capi.py new file mode 100644 index 00000000000..146d7cfc97a --- /dev/null +++ b/Lib/test/test_free_threading/test_capi.py @@ -0,0 +1,47 @@ +import ctypes +import sys +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +_PyImport_AddModuleRef = ctypes.pythonapi.PyImport_AddModuleRef +_PyImport_AddModuleRef.argtypes = (ctypes.c_char_p,) +_PyImport_AddModuleRef.restype = ctypes.py_object + + +@threading_helper.requires_working_threading() +class TestImportCAPI(unittest.TestCase): + def test_pyimport_addmoduleref_thread_safe(self): + # gh-137422: Concurrent calls to PyImport_AddModuleRef with the same + # module name must return the same module object. + + NUM_ITERS = 10 + NTHREADS = 4 + + module_name = f"test_free_threading_addmoduleref_{id(self)}" + module_name_bytes = module_name.encode() + sys.modules.pop(module_name, None) + results = [] + + def worker(): + module = _PyImport_AddModuleRef(module_name_bytes) + results.append(module) + + for _ in range(NUM_ITERS): + try: + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(len(results), NTHREADS) + reference = results[0] + for module in results[1:]: + self.assertIs(module, reference) + self.assertIn(module_name, sys.modules) + self.assertIs(sys.modules[module_name], reference) + finally: + results.clear() + sys.modules.pop(module_name, None) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_csv.py b/Lib/test/test_free_threading/test_csv.py new file mode 100644 index 00000000000..beb4510a128 --- /dev/null +++ b/Lib/test/test_free_threading/test_csv.py @@ -0,0 +1,50 @@ +import csv +import io +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestCSV(unittest.TestCase): + def test_concurrent_reader_next(self): + input_rows = [f"{i},{i},{i}" for i in range(50)] + input_stream = io.StringIO("\n".join(input_rows)) + reader = csv.reader(input_stream) + output_rows = [] + + def read_row(): + for row in reader: + self.assertEqual(len(row), 3) + output_rows.append(",".join(row)) + + run_concurrently(worker_func=read_row, nthreads=NTHREADS) + self.assertSetEqual(set(input_rows), set(output_rows)) + + def test_concurrent_writer_writerow(self): + output_stream = io.StringIO() + writer = csv.writer(output_stream) + row_per_thread = 10 + expected_rows = [] + + def write_row(): + for i in range(row_per_thread): + writer.writerow([i, i, i]) + expected_rows.append(f"{i},{i},{i}") + + run_concurrently(worker_func=write_row, nthreads=NTHREADS) + + # Rewind to the start of the stream and parse the rows + output_stream.seek(0) + output_rows = [line.strip() for line in output_stream.readlines()] + + self.assertEqual(len(output_rows), NTHREADS * row_per_thread) + self.assertListEqual(sorted(output_rows), sorted(expected_rows)) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_lzma.py b/Lib/test/test_free_threading/test_lzma.py new file mode 100644 index 00000000000..38d7e5db489 --- /dev/null +++ b/Lib/test/test_free_threading/test_lzma.py @@ -0,0 +1,56 @@ +import unittest + +from test.support import import_helper, threading_helper +from test.support.threading_helper import run_concurrently + +lzma = import_helper.import_module("lzma") +from lzma import LZMACompressor, LZMADecompressor + +from test.test_lzma import INPUT + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestLZMA(unittest.TestCase): + def test_compressor(self): + lzc = LZMACompressor() + + # First compress() outputs LZMA header + header = lzc.compress(INPUT) + self.assertGreater(len(header), 0) + + def worker(): + # it should return empty bytes as it buffers data internally + data = lzc.compress(INPUT) + self.assertEqual(data, b"") + + run_concurrently(worker_func=worker, nthreads=NTHREADS - 1) + full_compressed = header + lzc.flush() + decompressed = lzma.decompress(full_compressed) + # The decompressed data should be INPUT repeated NTHREADS times + self.assertEqual(decompressed, INPUT * NTHREADS) + + def test_decompressor(self): + chunk_size = 128 + chunks = [bytes([ord("a") + i]) * chunk_size for i in range(NTHREADS)] + input_data = b"".join(chunks) + compressed = lzma.compress(input_data) + + lzd = LZMADecompressor() + output = [] + + def worker(): + data = lzd.decompress(compressed, chunk_size) + self.assertEqual(len(data), chunk_size) + output.append(data) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(len(output), NTHREADS) + # Verify the expected chunks (order doesn't matter due to append race) + self.assertSetEqual(set(output), set(chunks)) + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py index 407bf7cbdee..2cd6e7b035e 100644 --- a/Lib/test/test_free_threading/test_monitoring.py +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -35,10 +35,10 @@ def work(self, n, funcs): return n return self.work(n - 1, funcs) + self.work(n - 2, funcs) - def start_work(self, n, funcs): + def start_work(self, n, funcs, barrier): # With the GIL builds we need to make sure that the hooks have # a chance to run as it's possible to run w/o releasing the GIL. - time.sleep(0.1) + barrier.wait() self.work(n, funcs) def after_test(self): @@ -53,14 +53,16 @@ def test_instrumentation(self): exec("def f(): pass", x) funcs.append(x["f"]) + barrier = Barrier(self.thread_count + 1) threads = [] for i in range(self.thread_count): # Each thread gets a copy of the func list to avoid contention - t = Thread(target=self.start_work, args=(self.fib, list(funcs))) + t = Thread(target=self.start_work, args=(self.fib, list(funcs), barrier)) t.start() threads.append(t) self.after_threads() + barrier.wait() while True: any_alive = False @@ -73,6 +75,9 @@ def test_instrumentation(self): break self.during_threads() + # Sleep to avoid setting monitoring events too rapidly and + # overflowing the global version counter + time.sleep(0.0001) self.after_test() @@ -117,7 +122,6 @@ class MonitoringMultiThreaded( def setUp(self): super().setUp() self.set = False - self.called = False monitoring.register_callback( self.tool_id, monitoring.events.LINE, self.callback ) @@ -127,10 +131,7 @@ def tearDown(self): super().tearDown() def callback(self, *args): - self.called = True - - def after_test(self): - self.assertTrue(self.called) + pass def during_threads(self): if self.set: @@ -148,16 +149,11 @@ class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): def setUp(self): self.set = False - self.called = False - - def after_test(self): - self.assertTrue(self.called) def tearDown(self): sys.settrace(None) def trace_func(self, frame, event, arg): - self.called = True return self.trace_func def during_threads(self): @@ -174,16 +170,11 @@ class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): def setUp(self): self.set = False - self.called = False - - def after_test(self): - self.assertTrue(self.called) def tearDown(self): sys.setprofile(None) def trace_func(self, frame, event, arg): - self.called = True return self.trace_func def during_threads(self): @@ -200,16 +191,11 @@ class SetProfileAllThreadsMultiThreaded(InstrumentationMultiThreadedMixin, TestC def setUp(self): self.set = False - self.called = False - - def after_test(self): - self.assertTrue(self.called) def tearDown(self): threading.setprofile_all_threads(None) def trace_func(self, frame, event, arg): - self.called = True return self.trace_func def during_threads(self): diff --git a/Lib/test/test_free_threading/test_re.py b/Lib/test/test_free_threading/test_re.py new file mode 100644 index 00000000000..56f25045d1b --- /dev/null +++ b/Lib/test/test_free_threading/test_re.py @@ -0,0 +1,62 @@ +import re +import unittest + +from test.support import threading_helper +from test.support.threading_helper import run_concurrently + + +NTHREADS = 10 + + +@threading_helper.requires_working_threading() +class TestRe(unittest.TestCase): + def test_pattern_sub(self): + """Pattern substitution should work across threads""" + pattern = re.compile(r"\w+@\w+\.\w+") + text = "e-mail: test@python.org or user@pycon.org. " * 5 + results = [] + + def worker(): + substituted = pattern.sub("(redacted)", text) + results.append(substituted.count("(redacted)")) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(results, [2 * 5] * NTHREADS) + + def test_pattern_search(self): + """Pattern search should work across threads.""" + emails = ["alice@python.org", "bob@pycon.org"] * 10 + pattern = re.compile(r"\w+@\w+\.\w+") + results = [] + + def worker(): + matches = [pattern.search(e).group() for e in emails] + results.append(len(matches)) + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + self.assertEqual(results, [2 * 10] * NTHREADS) + + def test_scanner_concurrent_access(self): + """Shared scanner should reject concurrent access.""" + pattern = re.compile(r"\w+") + scanner = pattern.scanner("word " * 10) + + def worker(): + for _ in range(100): + try: + scanner.search() + except ValueError as e: + if "already executing" in str(e): + pass + else: + raise + + run_concurrently(worker_func=worker, nthreads=NTHREADS) + # This test has no assertions. Its purpose is to catch crashes and + # enable thread sanitizer to detect race conditions. While "already + # executing" errors are very likely, they're not guaranteed due to + # non-deterministic thread scheduling, so we can't assert errors > 0. + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_free_threading/test_type.py b/Lib/test/test_free_threading/test_type.py index 2d995751005..1255d842dbf 100644 --- a/Lib/test/test_free_threading/test_type.py +++ b/Lib/test/test_free_threading/test_type.py @@ -141,6 +141,25 @@ def reader(): self.run_one(writer, reader) + def test_bases_change(self): + class BaseA: + pass + + class Derived(BaseA): + pass + + def writer(): + for _ in range(1000): + class BaseB: + pass + Derived.__bases__ = (BaseB,) + + def reader(): + for _ in range(1000): + Derived.__base__ + + self.run_one(writer, reader) + def run_one(self, writer_func, reader_func): barrier = threading.Barrier(NTHREADS) diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index ce9e7f6d57d..090926fd8d8 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -2785,7 +2785,7 @@ class Slot: @functools.singledispatchmethod @classmethod def go(cls, item, arg): - pass + return item - arg @go.register @classmethod @@ -2794,7 +2794,9 @@ def _(cls, item: int, arg): s = Slot() self.assertEqual(s.go(1, 1), 2) + self.assertEqual(s.go(1.5, 1), 0.5) self.assertEqual(Slot.go(1, 1), 2) + self.assertEqual(Slot.go(1.5, 1), 0.5) def test_staticmethod_slotted_class(self): class A: @@ -3485,6 +3487,37 @@ def _(item, arg: bytes) -> str: self.assertEqual(str(Signature.from_callable(A.static_func)), '(item, arg: int) -> str') + def test_method_non_descriptor(self): + class Callable: + def __init__(self, value): + self.value = value + def __call__(self, arg): + return self.value, arg + + class A: + t = functools.singledispatchmethod(Callable('general')) + t.register(int, Callable('special')) + + @functools.singledispatchmethod + def u(self, arg): + return 'general', arg + u.register(int, Callable('special')) + + v = functools.singledispatchmethod(Callable('general')) + @v.register(int) + def _(self, arg): + return 'special', arg + + a = A() + self.assertEqual(a.t(0), ('special', 0)) + self.assertEqual(a.t(2.5), ('general', 2.5)) + self.assertEqual(A.t(0), ('special', 0)) + self.assertEqual(A.t(2.5), ('general', 2.5)) + self.assertEqual(a.u(0), ('special', 0)) + self.assertEqual(a.u(2.5), ('general', 2.5)) + self.assertEqual(a.v(0), ('special', 0)) + self.assertEqual(a.v(2.5), ('general', 2.5)) + class CachedCostItem: _cost = 1 diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 10c3a622107..ec5df4d20e7 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -846,11 +846,15 @@ def test_get_stats(self): self.assertEqual(len(stats), 3) for st in stats: self.assertIsInstance(st, dict) - self.assertEqual(set(st), - {"collected", "collections", "uncollectable"}) + self.assertEqual( + set(st), + {"collected", "collections", "uncollectable", "candidates", "duration"} + ) self.assertGreaterEqual(st["collected"], 0) self.assertGreaterEqual(st["collections"], 0) self.assertGreaterEqual(st["uncollectable"], 0) + self.assertGreaterEqual(st["candidates"], 0) + self.assertGreaterEqual(st["duration"], 0) # Check that collection counts are incremented correctly if gc.isenabled(): self.addCleanup(gc.enable) @@ -861,11 +865,25 @@ def test_get_stats(self): self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"]) + self.assertGreater(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertEqual(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertGreaterEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertEqual(new[2][stat], old[2][stat]) gc.collect(2) - new = gc.get_stats() - self.assertEqual(new[0]["collections"], old[0]["collections"] + 1) + old, new = new, gc.get_stats() + self.assertEqual(new[0]["collections"], old[0]["collections"]) self.assertEqual(new[1]["collections"], old[1]["collections"]) self.assertEqual(new[2]["collections"], old[2]["collections"] + 1) + self.assertEqual(new[0]["duration"], old[0]["duration"]) + self.assertEqual(new[1]["duration"], old[1]["duration"]) + self.assertGreater(new[2]["duration"], old[2]["duration"]) + for stat in ["collected", "uncollectable", "candidates"]: + self.assertEqual(new[0][stat], old[0][stat]) + self.assertEqual(new[1][stat], old[1][stat]) + self.assertGreaterEqual(new[2][stat], old[2][stat]) def test_freeze(self): gc.freeze() @@ -1298,9 +1316,11 @@ def test_collect(self): # Check that we got the right info dict for all callbacks for v in self.visit: info = v[2] - self.assertTrue("generation" in info) - self.assertTrue("collected" in info) - self.assertTrue("uncollectable" in info) + self.assertIn("generation", info) + self.assertIn("collected", info) + self.assertIn("uncollectable", info) + self.assertIn("candidates", info) + self.assertIn("duration", info) def test_collect_generation(self): self.preclean() diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 09ce329bdcd..de0dbab480f 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -165,7 +165,7 @@ def test_inst_one_pop(self): value = stack_pointer[-1]; SPAM(value); stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -190,7 +190,7 @@ def test_inst_one_push(self): res = SPAM(); stack_pointer[0] = res; stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -247,7 +247,7 @@ def test_binary_op(self): res = SPAM(left, right); stack_pointer[-2] = res; stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -366,14 +366,14 @@ def test_sync_sp(self): _PyStackRef res; arg = stack_pointer[-1]; stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); escaping_call(); stack_pointer = _PyFrame_GetStackPointer(frame); res = Py_None; stack_pointer[0] = res; stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } @@ -489,7 +489,7 @@ def test_error_if_pop(self): res = 0; stack_pointer[-2] = res; stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -523,7 +523,7 @@ def test_error_if_pop_with_result(self): } stack_pointer[-2] = res; stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -553,7 +553,7 @@ def test_cache_effect(self): uint32_t extra = read_u32(&this_instr[2].cache); (void)extra; stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -640,7 +640,7 @@ def test_macro_instruction(self): } stack_pointer[-3] = res; stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } @@ -688,7 +688,7 @@ def test_macro_instruction(self): stack_pointer = _PyFrame_GetStackPointer(frame); stack_pointer[-3] = res; stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -827,7 +827,7 @@ def test_array_input(self): below = stack_pointer[-2 - oparg*2]; SPAM(values, oparg); stack_pointer += -2 - oparg*2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -860,7 +860,7 @@ def test_array_output(self): stack_pointer[-2] = below; stack_pointer[-1 + oparg*3] = above; stack_pointer += oparg*3; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -889,7 +889,7 @@ def test_array_input_output(self): above = 0; stack_pointer[0] = above; stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -918,11 +918,11 @@ def test_array_error_if(self): extra = stack_pointer[-1 - oparg]; if (oparg == 0) { stack_pointer += -1 - oparg; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); JUMP_TO_LABEL(error); } stack_pointer += -1 - oparg; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -960,7 +960,7 @@ def test_macro_push_push(self): stack_pointer[0] = val1; stack_pointer[1] = val2; stack_pointer += 2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -1263,13 +1263,13 @@ def test_flush(self): stack_pointer[0] = a; stack_pointer[1] = b; stack_pointer += 2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); // SECOND { USE(a, b); } stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -1325,7 +1325,7 @@ def test_pop_on_error_peeks(self): } } stack_pointer += -2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -1368,14 +1368,14 @@ def test_push_then_error(self): stack_pointer[0] = a; stack_pointer[1] = b; stack_pointer += 2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); JUMP_TO_LABEL(error); } } stack_pointer[0] = a; stack_pointer[1] = b; stack_pointer += 2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -1661,7 +1661,7 @@ def test_pystackref_frompyobject_new_next_to_cmacro(self): stack_pointer[0] = out1; stack_pointer[1] = out2; stack_pointer += 2; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); DISPATCH(); } """ @@ -1881,7 +1881,7 @@ def test_reassigning_dead_inputs(self): stack_pointer = _PyFrame_GetStackPointer(frame); in = temp; stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); _PyFrame_SetStackPointer(frame, stack_pointer); PyStackRef_CLOSE(in); stack_pointer = _PyFrame_GetStackPointer(frame); @@ -2115,8 +2115,9 @@ def test_validate_uop_unused_input(self): """ output = """ case OP: { + CHECK_STACK_BOUNDS(-1); stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } """ @@ -2132,8 +2133,9 @@ def test_validate_uop_unused_input(self): """ output = """ case OP: { + CHECK_STACK_BOUNDS(-1); stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } """ @@ -2153,9 +2155,10 @@ def test_validate_uop_unused_output(self): case OP: { JitOptRef foo; foo = NULL; + CHECK_STACK_BOUNDS(1); stack_pointer[0] = foo; stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } """ @@ -2172,8 +2175,9 @@ def test_validate_uop_unused_output(self): """ output = """ case OP: { + CHECK_STACK_BOUNDS(1); stack_pointer += 1; - assert(WITHIN_STACK_BOUNDS()); + ASSERT_WITHIN_STACK_BOUNDS(__FILE__, __LINE__); break; } """ diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index f14a882d386..442d30fc970 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -11,7 +11,7 @@ import unittest from subprocess import PIPE, Popen from test.support import catch_unraisable_exception -from test.support import import_helper +from test.support import force_not_colorized_test_class, import_helper from test.support import os_helper from test.support import _4G, bigmemtest, requires_subprocess from test.support.script_helper import assert_python_ok, assert_python_failure @@ -1057,6 +1057,7 @@ def wrapper(*args, **kwargs): return decorator +@force_not_colorized_test_class class TestCommandLine(unittest.TestCase): data = b'This is a simple test with gzip' diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index 33845d8a9e2..489bb049d2f 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -40,12 +40,15 @@ openssl_hashlib = import_fresh_module('hashlib', fresh=['_hashlib']) try: - from _hashlib import HASH, HASHXOF, openssl_md_meth_names, get_fips_mode + import _hashlib except ImportError: - HASH = None - HASHXOF = None - openssl_md_meth_names = frozenset() - + _hashlib = None +# The extension module may exist but only define some of these. gh-141907 +HASH = getattr(_hashlib, 'HASH', None) +HASHXOF = getattr(_hashlib, 'HASHXOF', None) +openssl_md_meth_names = getattr(_hashlib, 'openssl_md_meth_names', frozenset()) +get_fips_mode = getattr(_hashlib, 'get_fips_mode', None) +if not get_fips_mode: def get_fips_mode(): return 0 @@ -631,9 +634,14 @@ def check_sha3(self, name, capacity, rate, suffix): constructors = self.constructors_to_test[name] for hash_object_constructor in constructors: m = hash_object_constructor() - if HASH is not None and isinstance(m, HASH): - # _hashopenssl's variant does not have extra SHA3 attributes - continue + if name.startswith('shake_'): + if HASHXOF is not None and isinstance(m, HASHXOF): + # _hashopenssl's variant does not have extra SHA3 attributes + continue + else: + if HASH is not None and isinstance(m, HASH): + # _hashopenssl's variant does not have extra SHA3 attributes + continue self.assertEqual(capacity + rate, 1600) self.assertEqual(m._capacity_bits, capacity) self.assertEqual(m._rate_bits, rate) @@ -1156,7 +1164,8 @@ def test_disallow_instantiation(self): def test_hash_disallow_instantiation(self): # internal types like _hashlib.HASH are not constructable support.check_disallow_instantiation(self, HASH) - support.check_disallow_instantiation(self, HASHXOF) + if HASHXOF is not None: + support.check_disallow_instantiation(self, HASHXOF) def test_readonly_types(self): for algorithm, constructors in self.constructors_to_test.items(): diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 19dde9362a4..e4eff1ea17a 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -109,12 +109,13 @@ def get_events(self): class TestCaseBase(unittest.TestCase): - def get_collector(self): - return EventCollector(convert_charrefs=False) + def get_collector(self, convert_charrefs=False): + return EventCollector(convert_charrefs=convert_charrefs) - def _run_check(self, source, expected_events, collector=None): + def _run_check(self, source, expected_events, + *, collector=None, convert_charrefs=False): if collector is None: - collector = self.get_collector() + collector = self.get_collector(convert_charrefs=convert_charrefs) parser = collector for s in source: parser.feed(s) @@ -128,7 +129,7 @@ def _run_check(self, source, expected_events, collector=None): def _run_check_extra(self, source, events): self._run_check(source, events, - EventCollectorExtra(convert_charrefs=False)) + collector=EventCollectorExtra(convert_charrefs=False)) class HTMLParserTestCase(TestCaseBase): @@ -187,10 +188,87 @@ def test_malformatted_charref(self): ]) def test_unclosed_entityref(self): - self._run_check("&entityref foo", [ - ("entityref", "entityref"), - ("data", " foo"), - ]) + self._run_check('> <', [('entityref', 'gt'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('> <', [('data', '> <')], convert_charrefs=True) + + self._run_check('&undefined <', + [('entityref', 'undefined'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('&undefined <', [('data', '&undefined <')], + convert_charrefs=True) + + self._run_check('>undefined <', + [('entityref', 'gtundefined'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('>undefined <', [('data', '>undefined <')], + convert_charrefs=True) + + self._run_check('& <', [('data', '& '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('& <', [('data', '& <')], convert_charrefs=True) + + def test_eof_in_entityref(self): + self._run_check('>', [('entityref', 'gt')], convert_charrefs=False) + self._run_check('>', [('data', '>')], convert_charrefs=True) + + self._run_check('&g', [('entityref', 'g')], convert_charrefs=False) + self._run_check('&g', [('data', '&g')], convert_charrefs=True) + + self._run_check('&undefined', [('entityref', 'undefined')], + convert_charrefs=False) + self._run_check('&undefined', [('data', '&undefined')], + convert_charrefs=True) + + self._run_check('>undefined', [('entityref', 'gtundefined')], + convert_charrefs=False) + self._run_check('>undefined', [('data', '>undefined')], + convert_charrefs=True) + + self._run_check('&', [('data', '&')], convert_charrefs=False) + self._run_check('&', [('data', '&')], convert_charrefs=True) + + def test_unclosed_charref(self): + self._run_check('{ <', [('charref', '123'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('{ <', [('data', '{ <')], convert_charrefs=True) + self._run_check('« <', [('charref', 'xab'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('« <', [('data', '\xab <')], convert_charrefs=True) + + self._run_check('� <', + [('charref', '123456789'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('� <', [('data', '\ufffd <')], + convert_charrefs=True) + self._run_check('� <', + [('charref', 'x123456789'), ('data', ' '), ('entityref', 'lt')], + convert_charrefs=False) + self._run_check('� <', [('data', '\ufffd <')], + convert_charrefs=True) + + self._run_check('&# <', [('data', '&# '), ('entityref', 'lt')], convert_charrefs=False) + self._run_check('&# <', [('data', '&# <')], convert_charrefs=True) + self._run_check('&#x <', [('data', '&#x '), ('entityref', 'lt')], convert_charrefs=False) + self._run_check('&#x <', [('data', '&#x <')], convert_charrefs=True) + + def test_eof_in_charref(self): + self._run_check('{', [('charref', '123')], convert_charrefs=False) + self._run_check('{', [('data', '{')], convert_charrefs=True) + self._run_check('«', [('charref', 'xab')], convert_charrefs=False) + self._run_check('«', [('data', '\xab')], convert_charrefs=True) + + self._run_check('�', [('charref', '123456789')], + convert_charrefs=False) + self._run_check('�', [('data', '\ufffd')], convert_charrefs=True) + self._run_check('�', [('charref', 'x123456789')], + convert_charrefs=False) + self._run_check('�', [('data', '\ufffd')], convert_charrefs=True) + + self._run_check('&#', [('data', '&#')], convert_charrefs=False) + self._run_check('&#', [('data', '&#')], convert_charrefs=True) + self._run_check('&#x', [('data', '&#x')], convert_charrefs=False) + self._run_check('&#x', [('data', '&#x')], convert_charrefs=True) def test_bad_nesting(self): # Strangely, this *is* supposed to test that overlapping @@ -762,20 +840,6 @@ def test_correct_detection_of_start_tags(self): ] self._run_check(html, expected) - def test_EOF_in_charref(self): - # see #17802 - # This test checks that the UnboundLocalError reported in the issue - # is not raised, however I'm not sure the returned values are correct. - # Maybe HTMLParser should use self.unescape for these - data = [ - ('a&', [('data', 'a&')]), - ('a&b', [('data', 'ab')]), - ('a&b ', [('data', 'a'), ('entityref', 'b'), ('data', ' ')]), - ('a&b;', [('data', 'a'), ('entityref', 'b')]), - ] - for html, expected in data: - self._run_check(html, expected) - def test_eof_in_comments(self): data = [ ('