diff --git a/.azure-pipelines/ci.yml b/.azure-pipelines/ci.yml index 7490dd947e1..9b3415fd2b5 100644 --- a/.azure-pipelines/ci.yml +++ b/.azure-pipelines/ci.yml @@ -1,4 +1,4 @@ -trigger: ['main', '3.13', '3.12', '3.11', '3.10', '3.9', '3.8'] +trigger: ['main', '3.*'] jobs: - job: Prebuild diff --git a/.editorconfig b/.editorconfig index a6187d64f3c..5b04b32a89e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,6 +1,6 @@ root = true -[*.{py,c,cpp,h,js,rst,md,yml}] +[*.{py,c,cpp,h,js,rst,md,yml,yaml}] trim_trailing_whitespace = true insert_final_newline = true indent_style = space @@ -11,5 +11,5 @@ indent_size = 4 [*.rst] indent_size = 3 -[*.{js,yml}] +[*.{js,yml,yaml}] indent_size = 2 diff --git a/.gitattributes b/.gitattributes index 2f5a030981f..5682b9150a3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -10,6 +10,7 @@ *.ico binary *.jpg binary *.pck binary +*.pdf binary *.png binary *.psd binary *.tar binary @@ -67,6 +68,7 @@ PCbuild/readme.txt dos **/clinic/*.cpp.h generated **/clinic/*.h.h generated *_db.h generated +Doc/c-api/lifecycle.dot.svg generated Doc/data/stable_abi.dat generated Doc/library/token-list.inc generated Include/internal/pycore_ast.h generated diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 45d06317c26..88b95766982 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -30,6 +30,7 @@ Modules/Setup* @erlend-aasland Objects/set* @rhettinger Objects/dict* @methane @markshannon Objects/typevarobject.c @JelleZijlstra +Objects/unionobject.c @JelleZijlstra Objects/type* @markshannon Objects/codeobject.c @markshannon Objects/frameobject.c @markshannon @@ -167,6 +168,9 @@ Include/internal/pycore_time.h @pganssle @abalkin **/*imap* @python/email-team **/*poplib* @python/email-team +# Exclude .mailmap from being owned by @python/email-team +/.mailmap + # Garbage collector /Modules/gcmodule.c @pablogsal /Doc/library/gc.rst @pablogsal @@ -184,7 +188,7 @@ Include/internal/pycore_time.h @pganssle @abalkin # AST Python/ast.c @isidentical @JelleZijlstra @eclips4 -Python/ast_opt.c @isidentical @eclips4 +Python/ast_preprocess.c @isidentical @eclips4 Parser/asdl.py @isidentical @JelleZijlstra @eclips4 Parser/asdl_c.py @isidentical @JelleZijlstra @eclips4 Lib/ast.py @isidentical @JelleZijlstra @eclips4 @@ -294,7 +298,12 @@ Lib/test/test_interpreters/ @ericsnowcurrently **/*-ios* @freakboy3742 # WebAssembly -/Tools/wasm/ @brettcannon @freakboy3742 +Tools/wasm/config.site-wasm32-emscripten @freakboy3742 +/Tools/wasm/README.md @brettcannon @freakboy3742 +/Tools/wasm/wasi-env @brettcannon +/Tools/wasm/wasi.py @brettcannon +/Tools/wasm/emscripten @freakboy3742 +/Tools/wasm/wasi @brettcannon # SBOM /Misc/externals.spdx.json @sethmlarson @@ -316,3 +325,9 @@ Lib/test/test__colorize.py @hugovk # Fuzzing Modules/_xxtestfuzz/ @ammaraskar + +# t-strings +**/*interpolationobject* @lysnikolaou +**/*templateobject* @lysnikolaou +**/*templatelib* @lysnikolaou +**/*tstring* @lysnikolaou diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index 7b7810cf696..da70710b7ec 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -40,6 +40,7 @@ body: - "3.12" - "3.13" - "3.14" + - "3.15" - "CPython main branch" validations: required: true diff --git a/.github/ISSUE_TEMPLATE/crash.yml b/.github/ISSUE_TEMPLATE/crash.yml index 58da2dfe0c7..470ad581367 100644 --- a/.github/ISSUE_TEMPLATE/crash.yml +++ b/.github/ISSUE_TEMPLATE/crash.yml @@ -33,6 +33,7 @@ body: - "3.12" - "3.13" - "3.14" + - "3.15" - "CPython main branch" validations: required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 4cc2f461dbe..03ea959ca14 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,10 +7,10 @@ # Pull Request title It should be in the following format: ``` -gh-NNNNN: Summary of the changes made +gh-NNNNNN: Summary of the changes made ``` -Where: gh-NNNNN refers to the GitHub issue number. +Where: gh-NNNNNN refers to the GitHub issue number. Most PRs will require an issue number. Trivial changes, like fixing a typo, do not need an issue. @@ -20,11 +20,11 @@ # Backport Pull Request title please ensure that the PR title is in the following format: ``` -[X.Y] (GH-NNNN) +[X.Y] <title from the original PR> (GH-NNNNNN) ``` -Where: [X.Y] is the branch name, e.g. [3.6]. +Where: [X.Y] is the branch name, for example: [3.13]. -GH-NNNN refers to the PR number from `main`. +GH-NNNNNN refers to the PR number from `main`. --> diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml index af125266ae7..68aae196357 100644 --- a/.github/actionlint.yaml +++ b/.github/actionlint.yaml @@ -1,5 +1,6 @@ self-hosted-runner: - labels: ["windows-aarch64"] + # Pending https://github.com/rhysd/actionlint/issues/533 + labels: ["windows-11-arm"] config-variables: null diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c8fc5cff28b..54ebc914b46 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,13 @@ permissions: contents: read concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}-reusable + # https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#concurrency + # 'group' must be a key uniquely representing a PR or push event. + # github.workflow is the workflow name + # github.actor is the user invoking the workflow + # github.head_ref is the source branch of the PR or otherwise blank + # github.run_id is a unique number for the current run + group: ${{ github.workflow }}-${{ github.actor }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: @@ -156,28 +162,18 @@ jobs: strategy: fail-fast: false matrix: - os: - - windows-latest arch: - x64 + - Win32 + - arm64 free-threading: - false - true - include: - # Forks don't have access to Windows on Arm runners. These jobs are skipped below: - - os: ${{ github.repository_owner == 'python' && 'windows-aarch64' || 'windows-latest' }} - arch: arm64 - free-threading: false - # Forks don't have access to Windows on Arm runners. These jobs are skipped below: - - os: ${{ github.repository_owner == 'python' && 'windows-aarch64' || 'windows-latest' }} - arch: arm64 - free-threading: true - - os: windows-latest - arch: Win32 - free-threading: false + exclude: + # Skip Win32 on free-threaded builds + - { arch: Win32, free-threading: true } uses: ./.github/workflows/reusable-windows.yml with: - os: ${{ matrix.os }} arch: ${{ matrix.arch }} free-threading: ${{ matrix.free-threading }} @@ -189,18 +185,12 @@ jobs: strategy: fail-fast: false matrix: - os: - - windows-latest arch: - x86 - x64 - include: - # Forks don't have access to Windows on Arm runners. These jobs are skipped below: - - os: ${{ github.repository_owner == 'python' && 'windows-aarch64' || 'windows-latest' }} - arch: arm64 + - arm64 uses: ./.github/workflows/reusable-windows-msi.yml with: - os: ${{ matrix.os }} arch: ${{ matrix.arch }} build-macos: @@ -280,7 +270,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-24.04] - openssl_ver: [3.0.15, 3.1.7, 3.2.3, 3.3.2, 3.4.0] + openssl_ver: [3.0.16, 3.1.8, 3.2.4, 3.3.3, 3.4.1] # See Tools/ssl/make_ssl_data.py for notes on adding a new version env: OPENSSL_VER: ${{ matrix.openssl_ver }} @@ -347,7 +337,7 @@ jobs: needs: build-context if: needs.build-context.outputs.run-tests == 'true' env: - OPENSSL_VER: 3.0.15 + OPENSSL_VER: 3.0.16 PYTHONSTRICTEXTENSIONBUILD: 1 steps: - uses: actions/checkout@v4 @@ -438,8 +428,9 @@ jobs: # failing when executed from inside a virtual environment. "${VENV_PYTHON}" -m test \ -W \ - -o \ + --slowest \ -j4 \ + --timeout 900 \ -x test_asyncio \ -x test_multiprocessing_fork \ -x test_multiprocessing_forkserver \ @@ -466,7 +457,7 @@ jobs: matrix: os: [ubuntu-24.04] env: - OPENSSL_VER: 3.0.15 + OPENSSL_VER: 3.0.16 PYTHONSTRICTEXTENSIONBUILD: 1 ASAN_OPTIONS: detect_leaks=0:allocator_may_return_null=1:handle_segv=0 steps: diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 4f5ba84d48e..116e0c1e945 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -74,8 +74,7 @@ jobs: runner: windows-latest - target: aarch64-pc-windows-msvc/msvc architecture: ARM64 - # Forks don't have access to Windows on Arm runners. These jobs are skipped below: - runner: ${{ github.repository_owner == 'python' && 'windows-aarch64' || 'windows-latest' }} + runner: windows-11-arm - target: x86_64-apple-darwin/clang architecture: x86_64 runner: macos-13 @@ -96,11 +95,10 @@ jobs: with: python-version: '3.11' + # PCbuild downloads LLVM automatically: - name: Windows - # Forks don't have access to Windows on Arm runners. Skip those: - if: runner.os == 'Windows' && (matrix.architecture != 'ARM64' || github.repository_owner == 'python') + if: runner.os == 'Windows' run: | - choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}.1.0 ./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '' }} -p ${{ matrix.architecture }} ./PCbuild/rt.bat ${{ matrix.debug && '-d' || '' }} -p ${{ matrix.architecture }} -q --multiprocess 0 --timeout 4500 --verbose2 --verbose3 @@ -115,7 +113,7 @@ jobs: find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete brew install llvm@${{ matrix.llvm }} export SDKROOT="$(xcrun --show-sdk-path)" - ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '' }} + ./configure --enable-experimental-jit --enable-universalsdk --with-universal-archs=universal2 ${{ matrix.debug && '--with-pydebug' || '' }} make all --jobs 4 ./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 @@ -128,29 +126,30 @@ jobs: make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - jit-with-disabled-gil: - name: Free-Threaded (Debug) - needs: interpreter - runs-on: ubuntu-24.04 - timeout-minutes: 90 - strategy: - fail-fast: false - matrix: - llvm: - - 19 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - name: Build with JIT enabled and GIL disabled - run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} - export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" - ./configure --enable-experimental-jit --with-pydebug --disable-gil - make all --jobs 4 - - name: Run tests - run: | - ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 + # XXX: GH-133171 + # jit-with-disabled-gil: + # name: Free-Threaded (Debug) + # needs: interpreter + # runs-on: ubuntu-24.04 + # timeout-minutes: 90 + # strategy: + # fail-fast: false + # matrix: + # llvm: + # - 19 + # steps: + # - uses: actions/checkout@v4 + # with: + # persist-credentials: false + # - uses: actions/setup-python@v5 + # with: + # python-version: '3.11' + # - name: Build with JIT enabled and GIL disabled + # run: | + # sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + # export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + # ./configure --enable-experimental-jit --with-pydebug --disable-gil + # make all --jobs 4 + # - name: Run tests + # run: | + # ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 212f3e8d70c..95133c1338b 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -8,15 +8,23 @@ on: pull_request: paths: - ".github/workflows/mypy.yml" + - "Lib/_colorize.py" - "Lib/_pyrepl/**" - "Lib/test/libregrtest/**" + - "Lib/tomllib/**" + - "Misc/mypy/**" + - "Tools/build/compute-changes.py" + - "Tools/build/deepfreeze.py" - "Tools/build/generate_sbom.py" + - "Tools/build/generate-build-details.py" + - "Tools/build/verify_ensurepip_wheels.py" + - "Tools/build/update_file.py" + - "Tools/build/umarshal.py" - "Tools/cases_generator/**" - "Tools/clinic/**" - "Tools/jit/**" - "Tools/peg_generator/**" - "Tools/requirements-dev.txt" - - "Tools/wasm/**" workflow_dispatch: permissions: @@ -42,12 +50,12 @@ jobs: target: [ "Lib/_pyrepl", "Lib/test/libregrtest", + "Lib/tomllib", "Tools/build", "Tools/cases_generator", "Tools/clinic", "Tools/jit", "Tools/peg_generator", - "Tools/wasm", ] steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/reusable-context.yml b/.github/workflows/reusable-context.yml index 73e036a146f..d2668ddcac1 100644 --- a/.github/workflows/reusable-context.yml +++ b/.github/workflows/reusable-context.yml @@ -97,6 +97,9 @@ jobs: run: python Tools/build/compute-changes.py env: GITHUB_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + CCF_TARGET_REF: ${{ github.base_ref || github.event.repository.default_branch }} + CCF_HEAD_REF: ${{ github.event.pull_request.head.sha || github.sha }} - name: Compute hash for config cache key id: config-hash diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 79c28223ac3..657e0a6bf66 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -66,7 +66,7 @@ jobs: run: | set -Eeuo pipefail # Build docs with the nit-picky option; write warnings to file - make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --fail-on-warning --keep-going --warning-file sphinx-warnings.txt" html + make -C Doc/ PYTHON=../python SPHINXOPTS="--quiet --nitpicky --fail-on-warning --warning-file sphinx-warnings.txt" html - name: 'Check warnings' if: github.event_name == 'pull_request' run: | @@ -101,4 +101,4 @@ jobs: run: make -C Doc/ PYTHON=../python venv # Use "xvfb-run" since some doctest tests open GUI windows - name: 'Run documentation doctest' - run: xvfb-run make -C Doc/ PYTHON=../python SPHINXERRORHANDLING="--fail-on-warning --keep-going" doctest + run: xvfb-run make -C Doc/ PYTHON=../python SPHINXERRORHANDLING="--fail-on-warning" doctest diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index 6213c903c5a..a50de344bba 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -3,10 +3,6 @@ name: Reusable Windows MSI on: workflow_call: inputs: - os: - description: OS to run on - required: true - type: string arch: description: CPU architecture required: true @@ -21,7 +17,7 @@ env: jobs: build: name: installer for ${{ inputs.arch }} - runs-on: ${{ inputs.os }} + runs-on: ${{ inputs.arch == 'arm64' && 'windows-11-arm' || 'windows-latest' }} timeout-minutes: 60 env: ARCH: ${{ inputs.arch }} @@ -31,7 +27,5 @@ jobs: with: persist-credentials: false - name: Build CPython installer - # Forks don't have access to Windows on Arm runners. Skip those: - if: inputs.arch != 'arm64' || github.repository_owner == 'python' run: ./Tools/msi/build.bat --doc -"${ARCH}" shell: bash diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index deb1f41640e..37c802095b0 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -3,10 +3,6 @@ name: Reusable Windows on: workflow_call: inputs: - os: - description: OS to run on - required: true - type: string arch: description: CPU architecture required: true @@ -25,7 +21,7 @@ env: jobs: build: name: Build and test (${{ inputs.arch }}) - runs-on: ${{ inputs.os }} + runs-on: ${{ inputs.arch == 'arm64' && 'windows-11-arm' || 'windows-latest' }} timeout-minutes: 60 env: ARCH: ${{ inputs.arch }} @@ -37,8 +33,6 @@ jobs: if: inputs.arch != 'Win32' run: echo "::add-matcher::.github/problem-matchers/msvc.json" - name: Build CPython - # Forks don't have access to Windows on Arm runners. Skip those: - if: inputs.arch != 'arm64' || github.repository_owner == 'python' run: >- .\\PCbuild\\build.bat -e -d -v @@ -46,12 +40,8 @@ jobs: ${{ fromJSON(inputs.free-threading) && '--disable-gil' || '' }} shell: bash - name: Display build info - # Forks don't have access to Windows on Arm runners. Skip those: - if: inputs.arch != 'arm64' || github.repository_owner == 'python' run: .\\python.bat -m test.pythoninfo - name: Tests - # Forks don't have access to Windows on Arm runners. Skip those: - if: inputs.arch != 'arm64' || github.repository_owner == 'python' run: >- .\\PCbuild\\rt.bat -p "${ARCH}" diff --git a/.github/zizmor.yml b/.github/zizmor.yml index eeda8d9eaaf..9b42b47cc85 100644 --- a/.github/zizmor.yml +++ b/.github/zizmor.yml @@ -4,3 +4,7 @@ rules: dangerous-triggers: ignore: - documentation-links.yml + unpinned-uses: + config: + policies: + "*": ref-pin diff --git a/.gitignore b/.gitignore index 596e0f3af6a..2a6f249275c 100644 --- a/.gitignore +++ b/.gitignore @@ -138,11 +138,12 @@ Tools/unicode/data/ # hendrikmuhs/ccache-action@v1 /.ccache /cross-build/ -/jit_stencils.h +/jit_stencils*.h /platform /profile-clean-stamp /profile-run-stamp /profile-bolt-stamp +/profile-gen-stamp /pybuilddir.txt /pyconfig.h /python-config diff --git a/.mailmap b/.mailmap index 013c839ed6b..8f11bebb18f 100644 --- a/.mailmap +++ b/.mailmap @@ -1,3 +1,4 @@ # This file sets the canonical name for contributors to the repository. # Documentation: https://git-scm.com/docs/gitmailmap +Willow Chargin <wchargin@gmail.com> Amethyst Reese <amethyst@n7.gg> <john@noswap.com> diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fb44c27704d..3632cf39203 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.1 + rev: v0.11.8 hooks: - id: ruff name: Run Ruff (lint) on Doc/ @@ -11,9 +11,9 @@ repos: args: [--exit-non-zero-on-fix] files: ^Lib/test/ - id: ruff - name: Run Ruff (lint) on Tools/build/check_warnings.py + name: Run Ruff (lint) on Tools/build/ args: [--exit-non-zero-on-fix, --config=Tools/build/.ruff.toml] - files: ^Tools/build/check_warnings.py + files: ^Tools/build/ - id: ruff name: Run Ruff (lint) on Argument Clinic args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml] @@ -22,14 +22,14 @@ repos: name: Run Ruff (format) on Doc/ args: [--check] files: ^Doc/ + - id: ruff-format + name: Run Ruff (format) on Tools/build/check_warnings.py + args: [--check, --config=Tools/build/.ruff.toml] + files: ^Tools/build/check_warnings.py - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.10.0 + rev: 25.1.0 hooks: - - id: black - name: Run Black on Tools/build/check_warnings.py - files: ^Tools/build/check_warnings.py - args: [--line-length=79] - id: black name: Run Black on Tools/jit/ files: ^Tools/jit/ @@ -47,9 +47,11 @@ repos: exclude: Lib/test/tokenizedata/coding20731.py - id: trailing-whitespace types_or: [c, inc, python, rst] + - id: trailing-whitespace + files: '\.(gram)$' - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.31.0 + rev: 0.33.0 hooks: - id: check-dependabot - id: check-github-workflows @@ -61,7 +63,7 @@ repos: - id: actionlint - repo: https://github.com/woodruffw/zizmor-pre-commit - rev: v1.1.1 + rev: v1.6.0 hooks: - id: zizmor diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 00000000000..1c015fa8841 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,12 @@ +# Default settings for Ruff in CPython + +# PYTHON_FOR_REGEN +target-version = "py310" + +# PEP 8 +line-length = 79 + +# Enable automatic fixes by default. +# To override this, use ``fix = false`` in a subdirectory's config file +# or ``--no-fix`` on the command line. +fix = true diff --git a/Android/README.md b/Android/README.md index 789bcbe5edf..6cabd6ba5d6 100644 --- a/Android/README.md +++ b/Android/README.md @@ -25,11 +25,13 @@ ## Prerequisites `android-sdk/cmdline-tools/latest`. * `export ANDROID_HOME=/path/to/android-sdk` -The `android.py` script also requires the following commands to be on the `PATH`: +The `android.py` script will automatically use the SDK's `sdkmanager` to install +any packages it needs. + +The script also requires the following commands to be on the `PATH`: * `curl` * `java` (or set the `JAVA_HOME` environment variable) -* `tar` ## Building @@ -97,7 +99,7 @@ ## Testing The Python test suite can be run on Linux, macOS, or Windows: * On Linux, the emulator needs access to the KVM virtualization interface, and - a DISPLAY environment variable pointing at an X server. + a DISPLAY environment variable pointing at an X server. Xvfb is acceptable. The test suite can usually be run on a device with 2 GB of RAM, but this is borderline, so you may need to increase it to 4 GB. As of Android diff --git a/Android/android.py b/Android/android.py index 1b20820b784..3f48b42aa17 100755 --- a/Android/android.py +++ b/Android/android.py @@ -138,19 +138,19 @@ def make_build_python(context): run(["make", "-j", str(os.cpu_count())]) -def unpack_deps(host): +def unpack_deps(host, prefix_dir): deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" for name_ver in ["bzip2-1.0.8-2", "libffi-3.4.4-3", "openssl-3.0.15-4", "sqlite-3.49.1-0", "xz-5.4.6-1"]: filename = f"{name_ver}-{host}.tar.gz" download(f"{deps_url}/{name_ver}/{filename}") - run(["tar", "-xf", filename]) + shutil.unpack_archive(filename, prefix_dir) os.remove(filename) def download(url, target_dir="."): out_path = f"{target_dir}/{basename(url)}" - run(["curl", "-Lf", "-o", out_path, url]) + run(["curl", "-Lf", "--retry", "5", "--retry-all-errors", "-o", out_path, url]) return out_path @@ -162,8 +162,7 @@ def configure_host_python(context): prefix_dir = host_dir / "prefix" if not prefix_dir.exists(): prefix_dir.mkdir() - os.chdir(prefix_dir) - unpack_deps(context.host) + unpack_deps(context.host, prefix_dir) os.chdir(host_dir) command = [ @@ -241,16 +240,15 @@ def setup_sdk(): # the Gradle wrapper is not included in the CPython repository. Instead, we # extract it from the Gradle GitHub repository. def setup_testbed(): - # The Gradle version used for the build is specified in - # testbed/gradle/wrapper/gradle-wrapper.properties. This wrapper version - # doesn't need to match, as any version of the wrapper can download any - # version of Gradle. - version = "8.9.0" paths = ["gradlew", "gradlew.bat", "gradle/wrapper/gradle-wrapper.jar"] - if all((TESTBED_DIR / path).exists() for path in paths): return + # The wrapper version isn't important, as any version of the wrapper can + # download any version of Gradle. The Gradle version actually used for the + # build is specified in testbed/gradle/wrapper/gradle-wrapper.properties. + version = "8.9.0" + for path in paths: out_path = TESTBED_DIR / path out_path.parent.mkdir(exist_ok=True) diff --git a/Doc/.ruff.toml b/Doc/.ruff.toml index 111ce03b91d..3e676e13c3f 100644 --- a/Doc/.ruff.toml +++ b/Doc/.ruff.toml @@ -1,7 +1,6 @@ +extend = "../.ruff.toml" # Inherit the project-wide settings + target-version = "py312" # Align with the version in oldest_supported_sphinx -fix = true -output-format = "full" -line-length = 79 extend-exclude = [ "includes/*", # Temporary exclusions: diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index 7cbc99ad145..f8d01a3f29b 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -16,7 +16,20 @@ Allocating Objects on the Heap Initialize a newly allocated object *op* with its type and initial reference. Returns the initialized object. Other fields of the object are - not affected. + not initialized. Despite its name, this function is unrelated to the + object's :meth:`~object.__init__` method (:c:member:`~PyTypeObject.tp_init` + slot). Specifically, this function does **not** call the object's + :meth:`!__init__` method. + + In general, consider this function to be a low-level routine. Use + :c:member:`~PyTypeObject.tp_alloc` where possible. + For implementing :c:member:`!tp_alloc` for your type, prefer + :c:func:`PyType_GenericAlloc` or :c:func:`PyObject_New`. + + .. note:: + + This function only initializes the object's memory corresponding to the + initial :c:type:`PyObject` structure. It does not zero the rest. .. c:function:: PyVarObject* PyObject_InitVar(PyVarObject *op, PyTypeObject *type, Py_ssize_t size) @@ -24,38 +37,107 @@ Allocating Objects on the Heap This does everything :c:func:`PyObject_Init` does, and also initializes the length information for a variable-size object. + .. note:: + + This function only initializes some of the object's memory. It does not + zero the rest. + .. c:macro:: PyObject_New(TYPE, typeobj) - Allocate a new Python object using the C structure type *TYPE* - and the Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header are not initialized. - The caller will own the only reference to the object - (i.e. its reference count will be one). - The size of the memory allocation is determined from the - :c:member:`~PyTypeObject.tp_basicsize` field of the type object. + Allocates a new Python object using the C structure type *TYPE* and the + Python type object *typeobj* (``PyTypeObject*``) by calling + :c:func:`PyObject_Malloc` to allocate memory and initializing it like + :c:func:`PyObject_Init`. The caller will own the only reference to the + object (i.e. its reference count will be one). - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_New` instead. + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This macro does not call :c:member:`~PyTypeObject.tp_alloc`, + :c:member:`~PyTypeObject.tp_new` (:meth:`~object.__new__`), or + :c:member:`~PyTypeObject.tp_init` (:meth:`~object.__init__`). + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_New` instead. + + Memory allocated by this macro must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: + + PyObject *foo = PyObject_CallNoArgs((PyObject *)&PyFoo_Type); + + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:macro:: PyObject_NewVar(TYPE, typeobj, size) - Allocate a new Python object using the C structure type *TYPE* and the - Python type object *typeobj* (``PyTypeObject*``). - Fields not defined by the Python object header - are not initialized. The allocated memory allows for the *TYPE* structure - plus *size* (``Py_ssize_t``) fields of the size - given by the :c:member:`~PyTypeObject.tp_itemsize` field of - *typeobj*. This is useful for implementing objects like tuples, which are - able to determine their size at construction time. Embedding the array of - fields into the same allocation decreases the number of allocations, - improving the memory management efficiency. + Like :c:macro:`PyObject_New` except: - Note that this function is unsuitable if *typeobj* has - :c:macro:`Py_TPFLAGS_HAVE_GC` set. For such objects, - use :c:func:`PyObject_GC_NewVar` instead. + * It allocates enough memory for the *TYPE* structure plus *size* + (``Py_ssize_t``) fields of the size given by the + :c:member:`~PyTypeObject.tp_itemsize` field of *typeobj*. + * The memory is initialized like :c:func:`PyObject_InitVar`. + + This is useful for implementing objects like tuples, which are able to + determine their size at construction time. Embedding the array of fields + into the same allocation decreases the number of allocations, improving the + memory management efficiency. + + Avoid calling this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + This cannot be used for objects with :c:macro:`Py_TPFLAGS_HAVE_GC` set in + :c:member:`~PyTypeObject.tp_flags`; use :c:macro:`PyObject_GC_NewVar` + instead. + + Memory allocated by this function must be freed with :c:func:`PyObject_Free` + (usually called via the object's :c:member:`~PyTypeObject.tp_free` slot). + + .. note:: + + The returned memory is not guaranteed to have been completely zeroed + before it was initialized. + + .. note:: + + This macro does not construct a fully initialized object of the given + type; it merely allocates memory and prepares it for further + initialization by :c:member:`~PyTypeObject.tp_init`. To construct a + fully initialized object, call *typeobj* instead. For example:: + + PyObject *list_instance = PyObject_CallNoArgs((PyObject *)&PyList_Type); + + .. seealso:: + + * :c:func:`PyObject_Free` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` .. c:function:: void PyObject_Del(void *op) diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index 81b093a3510..3bbc990b632 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -274,6 +274,9 @@ small to receive the value. Convert a Python integer to a C :c:expr:`unsigned long` without overflow checking. + .. versionchanged:: 3.14 + Use :meth:`~object.__index__` if available. + ``L`` (:class:`int`) [long long] Convert a Python integer to a C :c:expr:`long long`. @@ -281,6 +284,9 @@ small to receive the value. Convert a Python integer to a C :c:expr:`unsigned long long` without overflow checking. + .. versionchanged:: 3.14 + Use :meth:`~object.__index__` if available. + ``n`` (:class:`int`) [:c:type:`Py_ssize_t`] Convert a Python integer to a C :c:type:`Py_ssize_t`. @@ -374,10 +380,10 @@ Other objects The *converter* for the ``O&`` format unit in *items* must not store a borrowed buffer or a borrowed reference. - .. versionchanged:: next + .. versionchanged:: 3.14 :class:`str` and :class:`bytearray` objects no longer accepted as a sequence. - .. deprecated:: next + .. deprecated:: 3.14 Non-tuple sequences are deprecated if *items* contains format units which store a borrowed buffer or a borrowed reference. @@ -390,7 +396,7 @@ Other objects If the argument is not ``None``, it is parsed according to the specified format unit. - .. versionadded:: next + .. versionadded:: 3.14 A few other characters have a meaning in a format string. These may not occur inside nested parentheses. They are: @@ -669,6 +675,8 @@ Building values ``L`` (:class:`int`) [long long] Convert a C :c:expr:`long long` to a Python integer object. + .. _capi-py-buildvalue-format-K: + ``K`` (:class:`int`) [unsigned long long] Convert a C :c:expr:`unsigned long long` to a Python integer object. diff --git a/Doc/c-api/complex.rst b/Doc/c-api/complex.rst index d1f5d8eda67..16bd79475dc 100644 --- a/Doc/c-api/complex.rst +++ b/Doc/c-api/complex.rst @@ -44,36 +44,12 @@ pointers. This is consistent throughout the API. representation. -.. c:function:: Py_complex _Py_cr_sum(Py_complex left, double right) - - Return the sum of a complex number and a real number, using the C :c:type:`Py_complex` - representation. - - .. versionadded:: 3.14 - - .. c:function:: Py_complex _Py_c_diff(Py_complex left, Py_complex right) Return the difference between two complex numbers, using the C :c:type:`Py_complex` representation. -.. c:function:: Py_complex _Py_cr_diff(Py_complex left, double right) - - Return the difference between a complex number and a real number, using the C - :c:type:`Py_complex` representation. - - .. versionadded:: 3.14 - - -.. c:function:: Py_complex _Py_rc_diff(double left, Py_complex right) - - Return the difference between a real number and a complex number, using the C - :c:type:`Py_complex` representation. - - .. versionadded:: 3.14 - - .. c:function:: Py_complex _Py_c_neg(Py_complex num) Return the negation of the complex number *num*, using the C @@ -86,14 +62,6 @@ pointers. This is consistent throughout the API. representation. -.. c:function:: Py_complex _Py_cr_prod(Py_complex left, double right) - - Return the product of a complex number and a real number, using the C - :c:type:`Py_complex` representation. - - .. versionadded:: 3.14 - - .. c:function:: Py_complex _Py_c_quot(Py_complex dividend, Py_complex divisor) Return the quotient of two complex numbers, using the C :c:type:`Py_complex` @@ -103,28 +71,6 @@ pointers. This is consistent throughout the API. :c:data:`errno` to :c:macro:`!EDOM`. -.. c:function:: Py_complex _Py_cr_quot(Py_complex dividend, double divisor) - - Return the quotient of a complex number and a real number, using the C - :c:type:`Py_complex` representation. - - If *divisor* is zero, this method returns zero and sets - :c:data:`errno` to :c:macro:`!EDOM`. - - .. versionadded:: 3.14 - - -.. c:function:: Py_complex _Py_rc_quot(double dividend, Py_complex divisor) - - Return the quotient of a real number and a complex number, using the C - :c:type:`Py_complex` representation. - - If *divisor* is zero, this method returns zero and sets - :c:data:`errno` to :c:macro:`!EDOM`. - - .. versionadded:: 3.14 - - .. c:function:: Py_complex _Py_c_pow(Py_complex num, Py_complex exp) Return the exponentiation of *num* by *exp*, using the C :c:type:`Py_complex` diff --git a/Doc/c-api/float.rst b/Doc/c-api/float.rst index 1da37a5bcae..c5a7653efca 100644 --- a/Doc/c-api/float.rst +++ b/Doc/c-api/float.rst @@ -96,6 +96,9 @@ NaNs (if such things exist on the platform) isn't handled correctly, and attempting to unpack a bytes string containing an IEEE INF or NaN will raise an exception. +Note that NaNs type may not be preserved on IEEE platforms (silent NaN become +quiet), for example on x86 systems in 32-bit mode. + On non-IEEE platforms with more precision, or larger dynamic range, than IEEE 754 supports, not all values can be packed; on non-IEEE platforms with less precision, or smaller dynamic range, not all values can be unpacked. What diff --git a/Doc/c-api/gcsupport.rst b/Doc/c-api/gcsupport.rst index 621da3eb069..f6fa52b36c5 100644 --- a/Doc/c-api/gcsupport.rst +++ b/Doc/c-api/gcsupport.rst @@ -57,11 +57,49 @@ rules: Analogous to :c:macro:`PyObject_New` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_New` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:macro:: PyObject_GC_NewVar(TYPE, typeobj, size) Analogous to :c:macro:`PyObject_NewVar` but for container objects with the :c:macro:`Py_TPFLAGS_HAVE_GC` flag set. + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + When populating a type's :c:member:`~PyTypeObject.tp_alloc` slot, + :c:func:`PyType_GenericAlloc` is preferred over a custom function that + simply calls this macro. + + Memory allocated by this macro must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + + .. seealso:: + + * :c:func:`PyObject_GC_Del` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_alloc` + + .. c:function:: PyObject* PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *type, size_t extra_size) Analogous to :c:macro:`PyObject_GC_New` but allocates *extra_size* @@ -73,6 +111,10 @@ rules: The extra data will be deallocated with the object, but otherwise it is not managed by Python. + Memory allocated by this function must be freed with + :c:func:`PyObject_GC_Del` (usually called via the object's + :c:member:`~PyTypeObject.tp_free` slot). + .. warning:: The function is marked as unstable because the final mechanism for reserving extra data after an instance is not yet decided. @@ -136,6 +178,21 @@ rules: Releases memory allocated to an object using :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_New`, + :c:macro:`PyObject_NewVar`, or related allocation functions; use + :c:func:`PyObject_Free` instead. + + .. seealso:: + + * :c:func:`PyObject_Free` is the non-GC equivalent of this function. + * :c:macro:`PyObject_GC_New` + * :c:macro:`PyObject_GC_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. c:function:: void PyObject_GC_UnTrack(void *op) @@ -180,9 +237,9 @@ provided. In order to use this macro, the :c:member:`~PyTypeObject.tp_traverse` must name its arguments exactly *visit* and *arg*: -.. c:function:: void Py_VISIT(PyObject *o) +.. c:macro:: Py_VISIT(o) - If *o* is not ``NULL``, call the *visit* callback, with arguments *o* + If the :c:expr:`PyObject *` *o* is not ``NULL``, call the *visit* callback, with arguments *o* and *arg*. If *visit* returns a non-zero value, then return it. Using this macro, :c:member:`~PyTypeObject.tp_traverse` handlers look like:: @@ -277,7 +334,7 @@ the garbage collector. Type of the visitor function to be passed to :c:func:`PyUnstable_GC_VisitObjects`. *arg* is the same as the *arg* passed to ``PyUnstable_GC_VisitObjects``. - Return ``0`` to continue iteration, return ``1`` to stop iteration. Other return + Return ``1`` to continue iteration, return ``0`` to stop iteration. Other return values are reserved for now so behavior on returning anything else is undefined. .. versionadded:: 3.12 diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst index 1cab3ce3061..8eabc0406b1 100644 --- a/Doc/c-api/import.rst +++ b/Doc/c-api/import.rst @@ -16,19 +16,6 @@ Importing Modules This is a wrapper around :c:func:`PyImport_Import()` which takes a :c:expr:`const char *` as an argument instead of a :c:expr:`PyObject *`. -.. c:function:: PyObject* PyImport_ImportModuleNoBlock(const char *name) - - This function is a deprecated alias of :c:func:`PyImport_ImportModule`. - - .. versionchanged:: 3.3 - This function used to fail immediately when the import lock was held - by another thread. In Python 3.3 though, the locking scheme switched - to per-module locks for most purposes, so this function's special - behaviour isn't needed anymore. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyImport_ImportModule` instead. - .. c:function:: PyObject* PyImport_ImportModuleEx(const char *name, PyObject *globals, PyObject *locals, PyObject *fromlist) diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 3597f35e0a2..9c866438b48 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -77,10 +77,7 @@ The following functions can be safely called before Python is initialized: Despite their apparent similarity to some of the functions listed above, the following functions **should not be called** before the interpreter has - been initialized: :c:func:`Py_EncodeLocale`, :c:func:`Py_GetPath`, - :c:func:`Py_GetPrefix`, :c:func:`Py_GetExecPrefix`, - :c:func:`Py_GetProgramFullPath`, :c:func:`Py_GetPythonHome`, - :c:func:`Py_GetProgramName`, :c:func:`PyEval_InitThreads`, and + been initialized: :c:func:`Py_EncodeLocale`, :c:func:`PyEval_InitThreads`, and :c:func:`Py_RunMain`. @@ -145,9 +142,6 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2. :c:member:`PyConfig.pathconfig_warnings` should be used instead, see :ref:`Python Initialization Configuration <init-config>`. - Suppress error messages when calculating the module search path in - :c:func:`Py_GetPath`. - Private flag used by ``_freeze_module`` and ``frozenmain`` programs. .. deprecated-removed:: 3.12 3.15 @@ -203,7 +197,7 @@ to 1 and ``-bb`` sets :c:data:`Py_BytesWarningFlag` to 2. Set by the :option:`-i` option. - .. deprecated:: 3.12 + .. deprecated-removed:: 3.12 3.15 .. c:var:: int Py_IsolatedFlag @@ -586,7 +580,6 @@ Process-wide parameters .. index:: single: Py_Initialize() single: main() - single: Py_GetPath() This API is kept for backward compatibility: setting :c:member:`PyConfig.program_name` should be used instead, see :ref:`Python @@ -596,7 +589,7 @@ Process-wide parameters the first time, if it is called at all. It tells the interpreter the value of the ``argv[0]`` argument to the :c:func:`main` function of the program (converted to wide characters). - This is used by :c:func:`Py_GetPath` and some other functions below to find + This is used by some other functions below to find the Python run-time libraries relative to the interpreter executable. The default value is ``'python'``. The argument should point to a zero-terminated wide character string in static storage whose contents will not @@ -609,146 +602,6 @@ Process-wide parameters .. deprecated-removed:: 3.11 3.15 -.. c:function:: wchar_t* Py_GetProgramName() - - Return the program name set with :c:member:`PyConfig.program_name`, or the default. - The returned string points into static storage; the caller should not modify its - value. - - This function should not be called before :c:func:`Py_Initialize`, otherwise - it returns ``NULL``. - - .. versionchanged:: 3.10 - It now returns ``NULL`` if called before :c:func:`Py_Initialize`. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyConfig_Get("executable") <PyConfig_Get>` - (:data:`sys.executable`) instead. - - -.. c:function:: wchar_t* Py_GetPrefix() - - Return the *prefix* for installed platform-independent files. This is derived - through a number of complicated rules from the program name set with - :c:member:`PyConfig.program_name` and some environment variables; for example, if the - program name is ``'/usr/local/bin/python'``, the prefix is ``'/usr/local'``. The - returned string points into static storage; the caller should not modify its - value. This corresponds to the :makevar:`prefix` variable in the top-level - :file:`Makefile` and the :option:`--prefix` argument to the :program:`configure` - script at build time. The value is available to Python code as ``sys.base_prefix``. - It is only useful on Unix. See also the next function. - - This function should not be called before :c:func:`Py_Initialize`, otherwise - it returns ``NULL``. - - .. versionchanged:: 3.10 - It now returns ``NULL`` if called before :c:func:`Py_Initialize`. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyConfig_Get("base_prefix") <PyConfig_Get>` - (:data:`sys.base_prefix`) instead. Use :c:func:`PyConfig_Get("prefix") - <PyConfig_Get>` (:data:`sys.prefix`) if :ref:`virtual environments - <venv-def>` need to be handled. - - -.. c:function:: wchar_t* Py_GetExecPrefix() - - Return the *exec-prefix* for installed platform-*dependent* files. This is - derived through a number of complicated rules from the program name set with - :c:member:`PyConfig.program_name` and some environment variables; for example, if the - program name is ``'/usr/local/bin/python'``, the exec-prefix is - ``'/usr/local'``. The returned string points into static storage; the caller - should not modify its value. This corresponds to the :makevar:`exec_prefix` - variable in the top-level :file:`Makefile` and the ``--exec-prefix`` - argument to the :program:`configure` script at build time. The value is - available to Python code as ``sys.base_exec_prefix``. It is only useful on - Unix. - - Background: The exec-prefix differs from the prefix when platform dependent - files (such as executables and shared libraries) are installed in a different - directory tree. In a typical installation, platform dependent files may be - installed in the :file:`/usr/local/plat` subtree while platform independent may - be installed in :file:`/usr/local`. - - Generally speaking, a platform is a combination of hardware and software - families, e.g. Sparc machines running the Solaris 2.x operating system are - considered the same platform, but Intel machines running Solaris 2.x are another - platform, and Intel machines running Linux are yet another platform. Different - major revisions of the same operating system generally also form different - platforms. Non-Unix operating systems are a different story; the installation - strategies on those systems are so different that the prefix and exec-prefix are - meaningless, and set to the empty string. Note that compiled Python bytecode - files are platform independent (but not independent from the Python version by - which they were compiled!). - - System administrators will know how to configure the :program:`mount` or - :program:`automount` programs to share :file:`/usr/local` between platforms - while having :file:`/usr/local/plat` be a different filesystem for each - platform. - - This function should not be called before :c:func:`Py_Initialize`, otherwise - it returns ``NULL``. - - .. versionchanged:: 3.10 - It now returns ``NULL`` if called before :c:func:`Py_Initialize`. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyConfig_Get("base_exec_prefix") <PyConfig_Get>` - (:data:`sys.base_exec_prefix`) instead. Use - :c:func:`PyConfig_Get("exec_prefix") <PyConfig_Get>` - (:data:`sys.exec_prefix`) if :ref:`virtual environments <venv-def>` need - to be handled. - -.. c:function:: wchar_t* Py_GetProgramFullPath() - - .. index:: - single: executable (in module sys) - - Return the full program name of the Python executable; this is computed as a - side-effect of deriving the default module search path from the program name - (set by :c:member:`PyConfig.program_name`). The returned string points into - static storage; the caller should not modify its value. The value is available - to Python code as ``sys.executable``. - - This function should not be called before :c:func:`Py_Initialize`, otherwise - it returns ``NULL``. - - .. versionchanged:: 3.10 - It now returns ``NULL`` if called before :c:func:`Py_Initialize`. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyConfig_Get("executable") <PyConfig_Get>` - (:data:`sys.executable`) instead. - - -.. c:function:: wchar_t* Py_GetPath() - - .. index:: - triple: module; search; path - single: path (in module sys) - - Return the default module search path; this is computed from the program name - (set by :c:member:`PyConfig.program_name`) and some environment variables. - The returned string consists of a series of directory names separated by a - platform dependent delimiter character. The delimiter character is ``':'`` - on Unix and macOS, ``';'`` on Windows. The returned string points into - static storage; the caller should not modify its value. The list - :data:`sys.path` is initialized with this value on interpreter startup; it - can be (and usually is) modified later to change the search path for loading - modules. - - This function should not be called before :c:func:`Py_Initialize`, otherwise - it returns ``NULL``. - - .. XXX should give the exact rules - - .. versionchanged:: 3.10 - It now returns ``NULL`` if called before :c:func:`Py_Initialize`. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyConfig_Get("module_search_paths") <PyConfig_Get>` - (:data:`sys.path`) instead. - .. c:function:: const char* Py_GetVersion() Return the version of this Python interpreter. This is a string that looks @@ -919,23 +772,6 @@ Process-wide parameters .. deprecated-removed:: 3.11 3.15 -.. c:function:: wchar_t* Py_GetPythonHome() - - Return the default "home", that is, the value set by - :c:member:`PyConfig.home`, or the value of the :envvar:`PYTHONHOME` - environment variable if it is set. - - This function should not be called before :c:func:`Py_Initialize`, otherwise - it returns ``NULL``. - - .. versionchanged:: 3.10 - It now returns ``NULL`` if called before :c:func:`Py_Initialize`. - - .. deprecated-removed:: 3.13 3.15 - Use :c:func:`PyConfig_Get("home") <PyConfig_Get>` or the - :envvar:`PYTHONHOME` environment variable instead. - - .. _threads: Thread State and the Global Interpreter Lock @@ -1083,8 +919,36 @@ Note that the ``PyGILState_*`` functions assume there is only one global interpreter (created automatically by :c:func:`Py_Initialize`). Python supports the creation of additional interpreters (using :c:func:`Py_NewInterpreter`), but mixing multiple interpreters and the -``PyGILState_*`` API is unsupported. +``PyGILState_*`` API is unsupported. This is because :c:func:`PyGILState_Ensure` +and similar functions default to :term:`attaching <attached thread state>` a +:term:`thread state` for the main interpreter, meaning that the thread can't safely +interact with the calling subinterpreter. +Supporting subinterpreters in non-Python threads +------------------------------------------------ + +If you would like to support subinterpreters with non-Python created threads, you +must use the ``PyThreadState_*`` API instead of the traditional ``PyGILState_*`` +API. + +In particular, you must store the interpreter state from the calling +function and pass it to :c:func:`PyThreadState_New`, which will ensure that +the :term:`thread state` is targeting the correct interpreter:: + + /* The return value of PyInterpreterState_Get() from the + function that created this thread. */ + PyInterpreterState *interp = ThreadData->interp; + PyThreadState *tstate = PyThreadState_New(interp); + PyThreadState_Swap(tstate); + + /* GIL of the subinterpreter is now held. + Perform Python actions here. */ + result = CallSomeFunction(); + /* evaluate result or handle exception */ + + /* Destroy the thread state. No Python API allowed beyond this point. */ + PyThreadState_Clear(tstate); + PyThreadState_DeleteCurrent(); .. _fork-and-threads: @@ -1131,7 +995,7 @@ Cautions regarding runtime finalization In the late stage of :term:`interpreter shutdown`, after attempting to wait for non-daemon threads to exit (though this can be interrupted by :class:`KeyboardInterrupt`) and running the :mod:`atexit` functions, the runtime -is marked as *finalizing*: :c:func:`_Py_IsFinalizing` and +is marked as *finalizing*: :c:func:`Py_IsFinalizing` and :func:`sys.is_finalizing` return true. At this point, only the *finalization thread* that initiated finalization (typically the main thread) is allowed to acquire the :term:`GIL`. @@ -1261,6 +1125,10 @@ code, or when embedding the Python interpreter: .. seealso: :c:func:`PyEval_ReleaseThread` + .. note:: + Similar to :c:func:`PyGILState_Ensure`, this function will hang the + thread if the runtime is finalizing. + The following functions use thread-local storage, and are not compatible with sub-interpreters: @@ -1287,10 +1155,10 @@ with sub-interpreters: When the function returns, there will be an :term:`attached thread state` and the thread will be able to call arbitrary Python code. Failure is a fatal error. - .. note:: - Calling this function from a thread when the runtime is finalizing will - hang the thread until the program exits, even if the thread was not - created by Python. Refer to + .. warning:: + Calling this function when the runtime is finalizing is unsafe. Doing + so will either hang the thread until the program ends, or fully crash + the interpreter in rare cases. Refer to :ref:`cautions-regarding-runtime-finalization` for more details. .. versionchanged:: 3.14 @@ -1307,7 +1175,6 @@ with sub-interpreters: Every call to :c:func:`PyGILState_Ensure` must be matched by a call to :c:func:`PyGILState_Release` on the same thread. - .. c:function:: PyThreadState* PyGILState_GetThisThreadState() Get the :term:`attached thread state` for this thread. May return ``NULL`` if no @@ -1315,20 +1182,30 @@ with sub-interpreters: always has such a thread-state, even if no auto-thread-state call has been made on the main thread. This is mainly a helper/diagnostic function. - .. seealso: :c:func:`PyThreadState_Get`` + .. note:: + This function does not account for :term:`thread states <thread state>` created + by something other than :c:func:`PyGILState_Ensure` (such as :c:func:`PyThreadState_New`). + Prefer :c:func:`PyThreadState_Get` or :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. seealso: :c:func:`PyThreadState_Get`` .. c:function:: int PyGILState_Check() Return ``1`` if the current thread is holding the :term:`GIL` and ``0`` otherwise. This function can be called from any thread at any time. - Only if it has had its Python thread state initialized and currently is - holding the :term:`GIL` will it return ``1``. + Only if it has had its :term:`thread state <attached thread state>` initialized + via :c:func:`PyGILState_Ensure` will it return ``1``. This is mainly a helper/diagnostic function. It can be useful for example in callback contexts or memory allocation functions when knowing that the :term:`GIL` is locked can allow the caller to perform sensitive actions or otherwise behave differently. + .. note:: + If the current Python process has ever created a subinterpreter, this + function will *always* return ``1``. Prefer :c:func:`PyThreadState_GetUnchecked` + for most cases. + .. versionadded:: 3.4 @@ -1517,16 +1394,6 @@ All of the following functions must be called after :c:func:`Py_Initialize`. .. versionadded:: 3.8 -.. c:function:: PyObject* PyUnstable_InterpreterState_GetMainModule(PyInterpreterState *interp) - - Return a :term:`strong reference` to the ``__main__`` :ref:`module object <moduleobjects>` - for the given interpreter. - - The caller must have an :term:`attached thread state`. - - .. versionadded:: 3.13 - - .. c:type:: PyObject* (*_PyFrameEvalFunction)(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) Type of a frame evaluation function. diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 74f34f81d46..e1931655618 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -320,7 +320,7 @@ Configuration Options * - ``"cpu_count"`` - :c:member:`cpu_count <PyConfig.cpu_count>` - ``int`` - - Read-only + - Public * - ``"dev_mode"`` - :c:member:`dev_mode <PyConfig.dev_mode>` - ``bool`` @@ -363,7 +363,7 @@ Configuration Options - Read-only * - ``"import_time"`` - :c:member:`import_time <PyConfig.import_time>` - - ``bool`` + - ``int`` - Read-only * - ``"inspect"`` - :c:member:`inspect <PyConfig.inspect>` @@ -619,6 +619,8 @@ Some options are read from the :mod:`sys` attributes. For example, the option The caller must have an :term:`attached thread state`. The function cannot be called before Python initialization nor after Python finalization. + .. audit-event:: cpython.PyConfig_Set name,value c.PyConfig_Set + .. versionadded:: 3.14 @@ -1475,13 +1477,19 @@ PyConfig .. c:member:: int import_time - If non-zero, profile import time. + If ``1``, profile import time. + If ``2``, include additional output that indicates + when an imported module has already been loaded. - Set the ``1`` by the :option:`-X importtime <-X>` option and the + Set by the :option:`-X importtime <-X>` option and the :envvar:`PYTHONPROFILEIMPORTTIME` environment variable. Default: ``0``. + .. versionchanged:: 3.14 + + Added support for ``import_time = 2`` + .. c:member:: int inspect Enter interactive mode after executing a script or a command. diff --git a/Doc/c-api/intro.rst b/Doc/c-api/intro.rst index 76d7d579342..0c20ad17194 100644 --- a/Doc/c-api/intro.rst +++ b/Doc/c-api/intro.rst @@ -148,7 +148,7 @@ complete listing. .. c:macro:: Py_ALWAYS_INLINE Ask the compiler to always inline a static inline function. The compiler can - ignore it and decides to not inline the function. + ignore it and decide to not inline the function. It can be used to inline performance critical static inline functions when building Python in debug mode with function inlining disabled. For example, @@ -779,20 +779,11 @@ found along :envvar:`PATH`.) The user can override this behavior by setting the environment variable :envvar:`PYTHONHOME`, or insert additional directories in front of the standard path by setting :envvar:`PYTHONPATH`. -.. index:: - single: Py_GetPath (C function) - single: Py_GetPrefix (C function) - single: Py_GetExecPrefix (C function) - single: Py_GetProgramFullPath (C function) - The embedding application can steer the search by setting :c:member:`PyConfig.program_name` *before* calling :c:func:`Py_InitializeFromConfig`. Note that :envvar:`PYTHONHOME` still overrides this and :envvar:`PYTHONPATH` is still -inserted in front of the standard path. An application that requires total -control has to provide its own implementation of :c:func:`Py_GetPath`, -:c:func:`Py_GetPrefix`, :c:func:`Py_GetExecPrefix`, and -:c:func:`Py_GetProgramFullPath` (all defined in :file:`Modules/getpath.c`). +inserted in front of the standard path. .. index:: single: Py_IsInitialized (C function) @@ -826,14 +817,17 @@ frequently used builds will be described in the remainder of this section. Compiling the interpreter with the :c:macro:`!Py_DEBUG` macro defined produces what is generally meant by :ref:`a debug build of Python <debug-build>`. -:c:macro:`!Py_DEBUG` is enabled in the Unix build by adding -:option:`--with-pydebug` to the :file:`./configure` command. -It is also implied by the presence of the -not-Python-specific :c:macro:`!_DEBUG` macro. When :c:macro:`!Py_DEBUG` is enabled -in the Unix build, compiler optimization is disabled. + +On Unix, :c:macro:`!Py_DEBUG` can be enabled by adding :option:`--with-pydebug` +to the :file:`./configure` command. This will also disable compiler optimization. + +On Windows, selecting a debug build (e.g., by passing the :option:`-d` option to +:file:`PCbuild/build.bat`) automatically enables :c:macro:`!Py_DEBUG`. +Additionally, the presence of the not-Python-specific :c:macro:`!_DEBUG` macro, +when defined by the compiler, will also implicitly enable :c:macro:`!Py_DEBUG`. In addition to the reference count debugging described below, extra checks are -performed, see :ref:`Python Debug Build <debug-build>`. +performed. See :ref:`Python Debug Build <debug-build>` for more details. Defining :c:macro:`Py_TRACE_REFS` enables reference tracing (see the :option:`configure --with-trace-refs option <--with-trace-refs>`). diff --git a/Doc/c-api/lifecycle.dot b/Doc/c-api/lifecycle.dot new file mode 100644 index 00000000000..dca9f87e9e0 --- /dev/null +++ b/Doc/c-api/lifecycle.dot @@ -0,0 +1,156 @@ +digraph "Life Events" { + graph [ + fontnames="svg" + fontsize=12.0 + id="life_events_graph" + layout="dot" + margin="0,0" + ranksep=0.25 + stylesheet="lifecycle.dot.css" + ] + node [ + fontname="Courier" + fontsize=12.0 + ] + edge [ + fontname="Times-Italic" + fontsize=12.0 + ] + + "start" [fontname="Times-Italic" shape=plain label=< start > style=invis] + { + rank="same" + "tp_new" [href="typeobj.html#c.PyTypeObject.tp_new" target="_top"] + "tp_alloc" [href="typeobj.html#c.PyTypeObject.tp_alloc" target="_top"] + } + "tp_init" [href="typeobj.html#c.PyTypeObject.tp_init" target="_top"] + "reachable" [fontname="Times-Italic" shape=box] + "tp_traverse" [ + href="typeobj.html#c.PyTypeObject.tp_traverse" + ordering="in" + target="_top" + ] + "finalized?" [ + fontname="Times-Italic" + label=<marked as<br/>finalized?> + ordering="in" + shape=diamond + tooltip="marked as finalized?" + ] + "tp_finalize" [ + href="typeobj.html#c.PyTypeObject.tp_finalize" + ordering="in" + target="_top" + ] + "tp_clear" [href="typeobj.html#c.PyTypeObject.tp_clear" target="_top"] + "uncollectable" [ + fontname="Times-Italic" + label=<uncollectable<br/>(leaked)> + shape=box + tooltip="uncollectable (leaked)" + ] + "tp_dealloc" [ + href="typeobj.html#c.PyTypeObject.tp_dealloc" + ordering="in" + target="_top" + ] + "tp_free" [href="typeobj.html#c.PyTypeObject.tp_free" target="_top"] + + "start" -> "tp_new" [ + label=< type call > + ] + "tp_new" -> "tp_alloc" [ + label=< direct call > arrowhead=empty + labeltooltip="tp_new to tp_alloc: direct call" + tooltip="tp_new to tp_alloc: direct call" + ] + "tp_new" -> "tp_init" [tooltip="tp_new to tp_init"] + "tp_init" -> "reachable" [tooltip="tp_init to reachable"] + "reachable" -> "tp_traverse" [ + dir="back" + label=< not in a <br/> cyclic <br/> isolate > + labeltooltip="tp_traverse to reachable: not in a cyclic isolate" + tooltip="tp_traverse to reachable: not in a cyclic isolate" + ] + "reachable" -> "tp_traverse" [ + label=< periodic <br/> cyclic isolate <br/> detection > + labeltooltip="reachable to tp_traverse: periodic cyclic isolate detection" + tooltip="reachable to tp_traverse: periodic cyclic isolate detection" + ] + "reachable" -> "tp_init" [tooltip="reachable to tp_init"] + "reachable" -> "tp_finalize" [ + dir="back" + label=< resurrected <br/> (maybe remove <br/> finalized mark) > + labeltooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + tooltip="tp_finalize to reachable: resurrected (maybe remove finalized mark)" + ] + "tp_traverse" -> "finalized?" [ + label=< cyclic <br/> isolate > + labeltooltip="tp_traverse to finalized?: cyclic isolate" + tooltip="tp_traverse to finalized?: cyclic isolate" + ] + "reachable" -> "finalized?" [ + label=< no refs > + labeltooltip="reachable to finalized?: no refs" + tooltip="reachable to finalized?: no refs" + ] + "finalized?" -> "tp_finalize" [ + label=< no (mark <br/> as finalized) > + labeltooltip="finalized? to tp_finalize: no (mark as finalized)" + tooltip="finalized? to tp_finalize: no (mark as finalized)" + ] + "finalized?" -> "tp_clear" [ + label=< yes > + labeltooltip="finalized? to tp_clear: yes" + tooltip="finalized? to tp_clear: yes" + ] + "tp_finalize" -> "tp_clear" [ + label=< no refs or <br/> cyclic isolate > + labeltooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + tooltip="tp_finalize to tp_clear: no refs or cyclic isolate" + ] + "tp_finalize" -> "tp_dealloc" [ + arrowtail=empty + dir="back" + href="lifecycle.html#c.PyObject_CallFinalizerFromDealloc" + style=dashed + label=< recommended<br/> call (see<br/> explanation)> + labeltooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + target="_top" + tooltip="tp_dealloc to tp_finalize: recommended call (see explanation)" + ] + "tp_finalize" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_finalize to tp_dealloc: no refs" + tooltip="tp_finalize to tp_dealloc: no refs" + ] + "tp_clear" -> "tp_dealloc" [ + label=< no refs > + labeltooltip="tp_clear to tp_dealloc: no refs" + tooltip="tp_clear to tp_dealloc: no refs" + ] + "tp_clear" -> "uncollectable" [ + label=< cyclic <br/> isolate > + labeltooltip="tp_clear to uncollectable: cyclic isolate" + tooltip="tp_clear to uncollectable: cyclic isolate" + ] + "uncollectable" -> "tp_dealloc" [ + style=invis + tooltip="uncollectable to tp_dealloc" + ] + "reachable" -> "uncollectable" [ + label=< cyclic <br/> isolate <br/> (no GC <br/> support) > + labeltooltip="reachable to uncollectable: cyclic isolate (no GC support)" + tooltip="reachable to uncollectable: cyclic isolate (no GC support)" + ] + "reachable" -> "tp_dealloc" [ + label=< no refs> + labeltooltip="reachable to tp_dealloc: no refs" + ] + "tp_dealloc" -> "tp_free" [ + arrowhead=empty + label=< direct call > + labeltooltip="tp_dealloc to tp_free: direct call" + tooltip="tp_dealloc to tp_free: direct call" + ] +} diff --git a/Doc/c-api/lifecycle.dot.css b/Doc/c-api/lifecycle.dot.css new file mode 100644 index 00000000000..3abf95b74da --- /dev/null +++ b/Doc/c-api/lifecycle.dot.css @@ -0,0 +1,21 @@ +#life_events_graph { + --svg-fgcolor: currentcolor; + --svg-bgcolor: transparent; +} +#life_events_graph a { + color: inherit; +} +#life_events_graph [stroke="black"] { + stroke: var(--svg-fgcolor); +} +#life_events_graph text, +#life_events_graph [fill="black"] { + fill: var(--svg-fgcolor); +} +#life_events_graph [fill="white"] { + fill: var(--svg-bgcolor); +} +#life_events_graph [fill="none"] { + /* On links, setting fill will make the entire shape clickable */ + fill: var(--svg-bgcolor); +} diff --git a/Doc/c-api/lifecycle.dot.pdf b/Doc/c-api/lifecycle.dot.pdf new file mode 100644 index 00000000000..ed5b5039c83 Binary files /dev/null and b/Doc/c-api/lifecycle.dot.pdf differ diff --git a/Doc/c-api/lifecycle.dot.svg b/Doc/c-api/lifecycle.dot.svg new file mode 100644 index 00000000000..7ace27dfcba --- /dev/null +++ b/Doc/c-api/lifecycle.dot.svg @@ -0,0 +1,374 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<?xml-stylesheet href="lifecycle.dot.css" type="text/css"?> +<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" + "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> +<!-- Generated by graphviz version 12.2.0 (0) + --> +<!-- Title: Life Events Pages: 1 --> +<svg width="465pt" height="845pt" + viewBox="0.00 0.00 465.30 845.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"> +<g id="life_events_graph" class="graph" transform="scale(1 1) rotate(0) translate(4 841)"> +<title>Life Events + + + + +tp_new + + +tp_new + + + + + +start->tp_new + + + + + + +    type call   + + + + + +tp_alloc + + +tp_alloc + + + + + +tp_new->tp_alloc + + + + + + +  direct call   + + + + + +tp_init + + +tp_init + + + + + +tp_new->tp_init + + + + + + + + +reachable + +reachable + + + +tp_init->reachable + + + + + + + + +reachable->tp_init + + + + + + + + +tp_traverse + + +tp_traverse + + + + + +reachable->tp_traverse + + + + + + +  not in a   +  cyclic   +  isolate   + + + + + +reachable->tp_traverse + + + + + + +  periodic   +  cyclic isolate    +  detection   + + + + + +finalized? + + +marked as +finalized? + + + + + +reachable->finalized? + + + + + + +  no refs   + + + + + +tp_finalize + + +tp_finalize + + + + + +reachable->tp_finalize + + + + + + +  resurrected   +  (maybe remove   +  finalized mark)   + + + + + +uncollectable + + +uncollectable +(leaked) + + + + + +reachable->uncollectable + + + + + + +  cyclic   +  isolate   +  (no GC   +  support)   + + + + + +tp_dealloc + + +tp_dealloc + + + + + +reachable->tp_dealloc + + + +  no refs + + + + + +tp_traverse->finalized? + + + + + + +  cyclic   +  isolate   + + + + + +finalized?->tp_finalize + + + + + + +  no (mark   +  as finalized)   + + + + + +tp_clear + + +tp_clear + + + + + +finalized?->tp_clear + + + + + + +  yes   + + + + + +tp_finalize->tp_clear + + + + + + +  no refs or    +  cyclic isolate   + + + + + +tp_finalize->tp_dealloc + + + + + + +  recommended +  call (see +  explanation) + + + + + +tp_finalize->tp_dealloc + + + + + + +   no refs   + + + + + +tp_clear->uncollectable + + + + + + +  cyclic   +  isolate   + + + + + +tp_clear->tp_dealloc + + + + + + +  no refs   + + + + + + +tp_free + + +tp_free + + + + + +tp_dealloc->tp_free + + + + + + +    direct call   + + + + + diff --git a/Doc/c-api/lifecycle.rst b/Doc/c-api/lifecycle.rst new file mode 100644 index 00000000000..0e2ffc096ca --- /dev/null +++ b/Doc/c-api/lifecycle.rst @@ -0,0 +1,273 @@ +.. highlight:: c + +.. _life-cycle: + +Object Life Cycle +================= + +This section explains how a type's slots relate to each other throughout the +life of an object. It is not intended to be a complete canonical reference for +the slots; instead, refer to the slot-specific documentation in +:ref:`type-structs` for details about a particular slot. + + +Life Events +----------- + +The figure below illustrates the order of events that can occur throughout an +object's life. An arrow from *A* to *B* indicates that event *B* can occur +after event *A* has occurred, with the arrow's label indicating the condition +that must be true for *B* to occur after *A*. + +.. only:: html and not epub + + .. raw:: html + + + + .. raw:: html + :file: lifecycle.dot.svg + + .. raw:: html + + + +.. only:: epub or not (html or latex) + + .. image:: lifecycle.dot.svg + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail + below. + +.. only:: latex + + .. image:: lifecycle.dot.pdf + :align: center + :class: invert-in-dark-mode + :alt: Diagram showing events in an object's life. Explained in detail + below. + +.. container:: + :name: life-events-graph-description + + Explanation: + + * When a new object is constructed by calling its type: + + #. :c:member:`~PyTypeObject.tp_new` is called to create a new object. + #. :c:member:`~PyTypeObject.tp_alloc` is directly called by + :c:member:`~PyTypeObject.tp_new` to allocate the memory for the new + object. + #. :c:member:`~PyTypeObject.tp_init` initializes the newly created object. + :c:member:`!tp_init` can be called again to re-initialize an object, if + desired. The :c:member:`!tp_init` call can also be skipped entirely, + for example by Python code calling :py:meth:`~object.__new__`. + + * After :c:member:`!tp_init` completes, the object is ready to use. + * Some time after the last reference to an object is removed: + + #. If an object is not marked as *finalized*, it might be finalized by + marking it as *finalized* and calling its + :c:member:`~PyTypeObject.tp_finalize` function. Python does + *not* finalize an object when the last reference to it is deleted; use + :c:func:`PyObject_CallFinalizerFromDealloc` to ensure that + :c:member:`~PyTypeObject.tp_finalize` is always called. + #. If the object is marked as finalized, + :c:member:`~PyTypeObject.tp_clear` might be called by the garbage collector + to clear references held by the object. It is *not* called when the + object's reference count reaches zero. + #. :c:member:`~PyTypeObject.tp_dealloc` is called to destroy the object. + To avoid code duplication, :c:member:`~PyTypeObject.tp_dealloc` typically + calls into :c:member:`~PyTypeObject.tp_clear` to free up the object's + references. + #. When :c:member:`~PyTypeObject.tp_dealloc` finishes object destruction, + it directly calls :c:member:`~PyTypeObject.tp_free` (usually set to + :c:func:`PyObject_Free` or :c:func:`PyObject_GC_Del` automatically as + appropriate for the type) to deallocate the memory. + + * The :c:member:`~PyTypeObject.tp_finalize` function is permitted to add a + reference to the object if desired. If it does, the object is + *resurrected*, preventing its pending destruction. (Only + :c:member:`!tp_finalize` is allowed to resurrect an object; + :c:member:`~PyTypeObject.tp_clear` and + :c:member:`~PyTypeObject.tp_dealloc` cannot without calling into + :c:member:`!tp_finalize`.) Resurrecting an object may + or may not cause the object's *finalized* mark to be removed. Currently, + Python does not remove the *finalized* mark from a resurrected object if + it supports garbage collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set) but does remove the mark if the object does not support + garbage collection; either or both of these behaviors may change in the + future. + * :c:member:`~PyTypeObject.tp_dealloc` can optionally call + :c:member:`~PyTypeObject.tp_finalize` via + :c:func:`PyObject_CallFinalizerFromDealloc` if it wishes to reuse that + code to help with object destruction. This is recommended because it + guarantees that :c:member:`!tp_finalize` is always called before + destruction. See the :c:member:`~PyTypeObject.tp_dealloc` documentation + for example code. + * If the object is a member of a :term:`cyclic isolate` and either + :c:member:`~PyTypeObject.tp_clear` fails to break the reference cycle or + the cyclic isolate is not detected (perhaps :func:`gc.disable` was called, + or the :c:macro:`Py_TPFLAGS_HAVE_GC` flag was erroneously omitted in one + of the involved types), the objects remain indefinitely uncollectable + (they "leak"). See :data:`gc.garbage`. + + If the object is marked as supporting garbage collection (the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`), the following events are also possible: + + * The garbage collector occasionally calls + :c:member:`~PyTypeObject.tp_traverse` to identify :term:`cyclic isolates + `. + * When the garbage collector discovers a :term:`cyclic isolate`, it + finalizes one of the objects in the group by marking it as *finalized* and + calling its :c:member:`~PyTypeObject.tp_finalize` function, if it has one. + This repeats until the cyclic isolate doesn't exist or all of the objects + have been finalized. + * :c:member:`~PyTypeObject.tp_finalize` is permitted to resurrect the object + by adding a reference from outside the :term:`cyclic isolate`. The new + reference causes the group of objects to no longer form a cyclic isolate + (the reference cycle may still exist, but if it does the objects are no + longer isolated). + * When the garbage collector discovers a :term:`cyclic isolate` and all of + the objects in the group have already been marked as *finalized*, the + garbage collector clears one or more of the uncleared objects in the group + (possibly concurrently) by calling each's + :c:member:`~PyTypeObject.tp_clear` function. This repeats as long as the + cyclic isolate still exists and not all of the objects have been cleared. + + +Cyclic Isolate Destruction +-------------------------- + +Listed below are the stages of life of a hypothetical :term:`cyclic isolate` +that continues to exist after each member object is finalized or cleared. It +is a memory leak if a cyclic isolate progresses through all of these stages; it should +vanish once all objects are cleared, if not sooner. A cyclic isolate can +vanish either because the reference cycle is broken or because the objects are +no longer isolated due to finalizer resurrection (see +:c:member:`~PyTypeObject.tp_finalize`). + +0. **Reachable** (not yet a cyclic isolate): All objects are in their normal, + reachable state. A reference cycle could exist, but an external reference + means the objects are not yet isolated. +#. **Unreachable but consistent:** The final reference from outside the cyclic + group of objects has been removed, causing the objects to become isolated + (thus a cyclic isolate is born). None of the group's objects have been + finalized or cleared yet. The cyclic isolate remains at this stage until + some future run of the garbage collector (not necessarily the next run + because the next run might not scan every object). +#. **Mix of finalized and not finalized:** Objects in a cyclic isolate are + finalized one at a time, which means that there is a period of time when the + cyclic isolate is composed of a mix of finalized and non-finalized objects. + Finalization order is unspecified, so it can appear random. A finalized + object must behave in a sane manner when non-finalized objects interact with + it, and a non-finalized object must be able to tolerate the finalization of + an arbitrary subset of its referents. +#. **All finalized:** All objects in a cyclic isolate are finalized before any + of them are cleared. +#. **Mix of finalized and cleared:** The objects can be cleared serially or + concurrently (but with the :term:`GIL` held); either way, some will finish + before others. A finalized object must be able to tolerate the clearing of + a subset of its referents. :pep:`442` calls this stage "cyclic trash". +#. **Leaked:** If a cyclic isolate still exists after all objects in the group + have been finalized and cleared, then the objects remain indefinitely + uncollectable (see :data:`gc.garbage`). It is a bug if a cyclic isolate + reaches this stage---it means the :c:member:`~PyTypeObject.tp_clear` methods + of the participating objects have failed to break the reference cycle as + required. + +If :c:member:`~PyTypeObject.tp_clear` did not exist, then Python would have no +way to safely break a reference cycle. Simply destroying an object in a cyclic +isolate would result in a dangling pointer, triggering undefined behavior when +an object referencing the destroyed object is itself destroyed. The clearing +step makes object destruction a two-phase process: first +:c:member:`~PyTypeObject.tp_clear` is called to partially destroy the objects +enough to detangle them from each other, then +:c:member:`~PyTypeObject.tp_dealloc` is called to complete the destruction. + +Unlike clearing, finalization is not a phase of destruction. A finalized +object must still behave properly by continuing to fulfill its design +contracts. An object's finalizer is allowed to execute arbitrary Python code, +and is even allowed to prevent the impending destruction by adding a reference. +The finalizer is only related to destruction by call order---if it runs, it runs +before destruction, which starts with :c:member:`~PyTypeObject.tp_clear` (if +called) and concludes with :c:member:`~PyTypeObject.tp_dealloc`. + +The finalization step is not necessary to safely reclaim the objects in a +cyclic isolate, but its existence makes it easier to design types that behave +in a sane manner when objects are cleared. Clearing an object might +necessarily leave it in a broken, partially destroyed state---it might be +unsafe to call any of the cleared object's methods or access any of its +attributes. With finalization, only finalized objects can possibly interact +with cleared objects; non-finalized objects are guaranteed to interact with +only non-cleared (but potentially finalized) objects. + +To summarize the possible interactions: + +* A non-finalized object might have references to or from non-finalized and + finalized objects, but not to or from cleared objects. +* A finalized object might have references to or from non-finalized, finalized, + and cleared objects. +* A cleared object might have references to or from finalized and cleared + objects, but not to or from non-finalized objects. + +Without any reference cycles, an object can be simply destroyed once its last +reference is deleted; the finalization and clearing steps are not necessary to +safely reclaim unused objects. However, it can be useful to automatically call +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` +before destruction anyway because type design is simplified when all objects +always experience the same series of events regardless of whether they +participated in a cyclic isolate. Python currently only calls +:c:member:`~PyTypeObject.tp_finalize` and :c:member:`~PyTypeObject.tp_clear` as +needed to destroy a cyclic isolate; this may change in a future version. + + +Functions +--------- + +To allocate and free memory, see :ref:`allocating-objects`. + + +.. c:function:: void PyObject_CallFinalizer(PyObject *op) + + Finalizes the object as described in :c:member:`~PyTypeObject.tp_finalize`. + Call this function (or :c:func:`PyObject_CallFinalizerFromDealloc`) instead + of calling :c:member:`~PyTypeObject.tp_finalize` directly because this + function may deduplicate multiple calls to :c:member:`!tp_finalize`. + Currently, calls are only deduplicated if the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set); this may + change in the future. + + +.. c:function:: int PyObject_CallFinalizerFromDealloc(PyObject *op) + + Same as :c:func:`PyObject_CallFinalizer` but meant to be called at the + beginning of the object's destructor (:c:member:`~PyTypeObject.tp_dealloc`). + There must not be any references to the object. If the object's finalizer + resurrects the object, this function returns -1; no further destruction + should happen. Otherwise, this function returns 0 and destruction can + continue normally. + + .. seealso:: + + :c:member:`~PyTypeObject.tp_dealloc` for example code. diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 64ae35daa70..61fa49f8681 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -376,6 +376,24 @@ The :ref:`default object allocator ` uses the If *p* is ``NULL``, no operation is performed. + Do not call this directly to free an object's memory; call the type's + :c:member:`~PyTypeObject.tp_free` slot instead. + + Do not use this for memory allocated by :c:macro:`PyObject_GC_New` or + :c:macro:`PyObject_GC_NewVar`; use :c:func:`PyObject_GC_Del` instead. + + .. seealso:: + + * :c:func:`PyObject_GC_Del` is the equivalent of this function for memory + allocated by types that support garbage collection. + * :c:func:`PyObject_Malloc` + * :c:func:`PyObject_Realloc` + * :c:func:`PyObject_Calloc` + * :c:macro:`PyObject_New` + * :c:macro:`PyObject_NewVar` + * :c:func:`PyType_GenericAlloc` + * :c:member:`~PyTypeObject.tp_free` + .. _default-memory-allocators: diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index bef3a79ccd0..0fd159f1eb8 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -613,6 +613,38 @@ Object Protocol .. versionadded:: 3.14 +.. c:function:: int PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *obj) + + Check if *obj* is a unique temporary object. + Returns ``1`` if *obj* is known to be a unique temporary object, + and ``0`` otherwise. This function cannot fail, but the check is + conservative, and may return ``0`` in some cases even if *obj* is a unique + temporary object. + + If an object is a unique temporary, it is guaranteed that the current code + has the only reference to the object. For arguments to C functions, this + should be used instead of checking if the reference count is ``1``. Starting + with Python 3.14, the interpreter internally avoids some reference count + modifications when loading objects onto the operands stack by + :term:`borrowing ` references when possible, which means + that a reference count of ``1`` by itself does not guarantee that a function + argument uniquely referenced. + + In the example below, ``my_func`` is called with a unique temporary object + as its argument:: + + my_func([1, 2, 3]) + + In the example below, ``my_func`` is **not** called with a unique temporary + object as its argument, even if its refcount is ``1``:: + + my_list = [1, 2, 3] + my_func(my_list) + + See also the function :c:func:`Py_REFCNT`. + + .. versionadded:: 3.14 + .. c:function:: int PyUnstable_IsImmortal(PyObject *obj) This function returns non-zero if *obj* is :term:`immortal`, and zero @@ -705,3 +737,21 @@ Object Protocol caller must hold a :term:`strong reference` to *obj* when calling this. .. versionadded:: 3.14 + +.. c:function:: int PyUnstable_Object_IsUniquelyReferenced(PyObject *op) + + Determine if *op* only has one reference. + + On GIL-enabled builds, this function is equivalent to + :c:expr:`Py_REFCNT(op) == 1`. + + On a :term:`free threaded ` build, this checks if *op*'s + :term:`reference count` is equal to one and additionally checks if *op* + is only used by this thread. :c:expr:`Py_REFCNT(op) == 1` is **not** + thread-safe on free threaded builds; prefer this function. + + The caller must hold an :term:`attached thread state`, despite the fact + that this function doesn't call into the Python interpreter. This function + cannot fail. + + .. versionadded:: 3.14 diff --git a/Doc/c-api/objimpl.rst b/Doc/c-api/objimpl.rst index 8bd8c107c98..83de4248039 100644 --- a/Doc/c-api/objimpl.rst +++ b/Doc/c-api/objimpl.rst @@ -12,6 +12,7 @@ object types. .. toctree:: allocation.rst + lifecycle.rst structures.rst typeobj.rst gcsupport.rst diff --git a/Doc/c-api/refcounting.rst b/Doc/c-api/refcounting.rst index d75dad737bc..b23f016f9b0 100644 --- a/Doc/c-api/refcounting.rst +++ b/Doc/c-api/refcounting.rst @@ -23,6 +23,15 @@ of Python objects. Use the :c:func:`Py_SET_REFCNT()` function to set an object reference count. + .. note:: + + On :term:`free threaded ` builds of Python, returning 1 + isn't sufficient to determine if it's safe to treat *o* as having no + access by other threads. Use :c:func:`PyUnstable_Object_IsUniquelyReferenced` + for that instead. + + See also the function :c:func:`PyUnstable_Object_IsUniqueReferencedTemporary()`. + .. versionchanged:: 3.10 :c:func:`Py_REFCNT()` is changed to the inline static function. diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst index ec2867b0ce0..2176b8e492f 100644 --- a/Doc/c-api/type.rst +++ b/Doc/c-api/type.rst @@ -151,14 +151,29 @@ Type Objects .. c:function:: PyObject* PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) - Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type object. Use - Python's default memory allocation mechanism to allocate a new instance and - initialize all its contents to ``NULL``. + Generic handler for the :c:member:`~PyTypeObject.tp_alloc` slot of a type + object. Uses Python's default memory allocation mechanism to allocate memory + for a new instance, zeros the memory, then initializes the memory as if by + calling :c:func:`PyObject_Init` or :c:func:`PyObject_InitVar`. + + Do not call this directly to allocate memory for an object; call the type's + :c:member:`~PyTypeObject.tp_alloc` slot instead. + + For types that support garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set), this function behaves like + :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar` (except the + memory is guaranteed to be zeroed before initialization), and should be + paired with :c:func:`PyObject_GC_Del` in :c:member:`~PyTypeObject.tp_free`. + Otherwise, it behaves like :c:macro:`PyObject_New` or + :c:macro:`PyObject_NewVar` (except the memory is guaranteed to be zeroed + before initialization) and should be paired with :c:func:`PyObject_Free` in + :c:member:`~PyTypeObject.tp_free`. .. c:function:: PyObject* PyType_GenericNew(PyTypeObject *type, PyObject *args, PyObject *kwds) - Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type object. Create a - new instance using the type's :c:member:`~PyTypeObject.tp_alloc` slot. + Generic handler for the :c:member:`~PyTypeObject.tp_new` slot of a type + object. Creates a new instance using the type's + :c:member:`~PyTypeObject.tp_alloc` slot and returns the resulting object. .. c:function:: int PyType_Ready(PyTypeObject *type) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 9d551aa997b..5df0c0fe608 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -79,7 +79,7 @@ Quick Reference | :c:member:`~PyTypeObject.tp_setattro` | :c:type:`setattrofunc` | __setattr__, | X | X | | G | | | | __delattr__ | | | | | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ - | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | | | | | % | + | :c:member:`~PyTypeObject.tp_as_buffer` | :c:type:`PyBufferProcs` * | :ref:`sub-slots` | | | | % | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ | :c:member:`~PyTypeObject.tp_flags` | unsigned long | | X | X | | ? | +------------------------------------------------+-----------------------------------+-------------------+---+---+---+---+ @@ -325,9 +325,10 @@ sub-slots +---------------------------------------------------------+-----------------------------------+---------------+ | | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | | + | :c:member:`~PyBufferProcs.bf_getbuffer` | :c:func:`getbufferproc` | __buffer__ | +---------------------------------------------------------+-----------------------------------+---------------+ - | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | | + | :c:member:`~PyBufferProcs.bf_releasebuffer` | :c:func:`releasebufferproc` | __release_\ | + | | | buffer\__ | +---------------------------------------------------------+-----------------------------------+---------------+ .. _slot-typedefs-table: @@ -611,7 +612,7 @@ and :c:data:`PyType_Type` effectively act as defaults.) Note that the :c:member:`~PyVarObject.ob_size` field may later be used for other purposes. For example, :py:type:`int` instances use the bits of :c:member:`~PyVarObject.ob_size` in an implementation-defined - way; the underlying storage and its size should be acessed using + way; the underlying storage and its size should be accessed using :c:func:`PyLong_Export`. .. note:: @@ -676,72 +677,122 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_dealloc - A pointer to the instance destructor function. This function must be defined - unless the type guarantees that its instances will never be deallocated (as is - the case for the singletons ``None`` and ``Ellipsis``). The function signature is:: + A pointer to the instance destructor function. The function signature is:: void tp_dealloc(PyObject *self); - The destructor function is called by the :c:func:`Py_DECREF` and - :c:func:`Py_XDECREF` macros when the new reference count is zero. At this point, - the instance is still in existence, but there are no references to it. The - destructor function should free all references which the instance owns, free all - memory buffers owned by the instance (using the freeing function corresponding - to the allocation function used to allocate the buffer), and call the type's - :c:member:`~PyTypeObject.tp_free` function. If the type is not subtypable - (doesn't have the :c:macro:`Py_TPFLAGS_BASETYPE` flag bit set), it is - permissible to call the object deallocator directly instead of via - :c:member:`~PyTypeObject.tp_free`. The object deallocator should be the one used to allocate the - instance; this is normally :c:func:`PyObject_Free` if the instance was allocated - using :c:macro:`PyObject_New` or :c:macro:`PyObject_NewVar`, or - :c:func:`PyObject_GC_Del` if the instance was allocated using - :c:macro:`PyObject_GC_New` or :c:macro:`PyObject_GC_NewVar`. + The destructor function should remove all references which the instance owns + (e.g., call :c:func:`Py_CLEAR`), free all memory buffers owned by the + instance, and call the type's :c:member:`~PyTypeObject.tp_free` function to + free the object itself. - If the type supports garbage collection (has the :c:macro:`Py_TPFLAGS_HAVE_GC` - flag bit set), the destructor should call :c:func:`PyObject_GC_UnTrack` + No guarantees are made about when an object is destroyed, except: + + * Python will destroy an object immediately or some time after the final + reference to the object is deleted, unless its finalizer + (:c:member:`~PyTypeObject.tp_finalize`) subsequently resurrects the + object. + * An object will not be destroyed while it is being automatically finalized + (:c:member:`~PyTypeObject.tp_finalize`) or automatically cleared + (:c:member:`~PyTypeObject.tp_clear`). + + CPython currently destroys an object immediately from :c:func:`Py_DECREF` + when the new reference count is zero, but this may change in a future + version. + + It is recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is always + finalized before destruction. + + If the type supports garbage collection (the :c:macro:`Py_TPFLAGS_HAVE_GC` + flag is set), the destructor should call :c:func:`PyObject_GC_UnTrack` before clearing any member fields. - .. code-block:: c + It is permissible to call :c:member:`~PyTypeObject.tp_clear` from + :c:member:`!tp_dealloc` to reduce code duplication and to guarantee that the + object is always cleared before destruction. Beware that + :c:member:`!tp_clear` might have already been called. - static void foo_dealloc(foo_object *self) { + If the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the + deallocator should release the owned reference to its type object (via + :c:func:`Py_DECREF`) after calling the type deallocator. See the example + code below.:: + + static void + foo_dealloc(PyObject *op) + { + foo_object *self = (foo_object *) op; PyObject_GC_UnTrack(self); Py_CLEAR(self->ref); - Py_TYPE(self)->tp_free((PyObject *)self); - } + Py_TYPE(self)->tp_free(self); + } - Finally, if the type is heap allocated (:c:macro:`Py_TPFLAGS_HEAPTYPE`), the - deallocator should release the owned reference to its type object - (via :c:func:`Py_DECREF`) after - calling the type deallocator. In order to avoid dangling pointers, the - recommended way to achieve this is: + :c:member:`!tp_dealloc` must leave the exception status unchanged. If it + needs to call something that might raise an exception, the exception state + must be backed up first and restored later (after logging any exceptions + with :c:func:`PyErr_WriteUnraisable`). - .. code-block:: c + Example:: - static void foo_dealloc(foo_object *self) { - PyTypeObject *tp = Py_TYPE(self); - // free references and buffers here - tp->tp_free(self); - Py_DECREF(tp); - } + static void + foo_dealloc(PyObject *self) + { + PyObject *exc = PyErr_GetRaisedException(); - .. warning:: + if (PyObject_CallFinalizerFromDealloc(self) < 0) { + // self was resurrected. + goto done; + } - In a garbage collected Python, :c:member:`!tp_dealloc` may be called from - any Python thread, not just the thread which created the object (if the - object becomes part of a refcount cycle, that cycle might be collected by - a garbage collection on any thread). This is not a problem for Python - API calls, since the thread on which :c:member:`!tp_dealloc` is called - with an :term:`attached thread state`. However, if the object being - destroyed in turn destroys objects from some other C or C++ library, care - should be taken to ensure that destroying those objects on the thread - which called :c:member:`!tp_dealloc` will not violate any assumptions of - the library. + PyTypeObject *tp = Py_TYPE(self); + + if (tp->tp_flags & Py_TPFLAGS_HAVE_GC) { + PyObject_GC_UnTrack(self); + } + + // Optional, but convenient to avoid code duplication. + if (tp->tp_clear && tp->tp_clear(self) < 0) { + PyErr_WriteUnraisable(self); + } + + // Any additional destruction goes here. + + tp->tp_free(self); + self = NULL; // In case PyErr_WriteUnraisable() is called below. + + if (tp->tp_flags & Py_TPFLAGS_HEAPTYPE) { + Py_CLEAR(tp); + } + + done: + // Optional, if something was called that might have raised an + // exception. + if (PyErr_Occurred()) { + PyErr_WriteUnraisable(self); + } + PyErr_SetRaisedException(exc); + } + + :c:member:`!tp_dealloc` may be called from + any Python thread, not just the thread which created the object (if the + object becomes part of a refcount cycle, that cycle might be collected by + a garbage collection on any thread). This is not a problem for Python + API calls, since the thread on which :c:member:`!tp_dealloc` is called + with an :term:`attached thread state`. However, if the object being + destroyed in turn destroys objects from some other C library, care + should be taken to ensure that destroying those objects on the thread + which called :c:member:`!tp_dealloc` will not violate any assumptions of + the library. **Inheritance:** This field is inherited by subtypes. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: Py_ssize_t PyTypeObject.tp_vectorcall_offset @@ -1132,11 +1183,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:macro:: Py_TPFLAGS_HAVE_GC This bit is set when the object supports garbage collection. If this bit - is set, instances must be created using :c:macro:`PyObject_GC_New` and - destroyed using :c:func:`PyObject_GC_Del`. More information in section - :ref:`supporting-cycle-detection`. This bit also implies that the - GC-related fields :c:member:`~PyTypeObject.tp_traverse` and :c:member:`~PyTypeObject.tp_clear` are present in - the type object. + is set, memory for new instances (see :c:member:`~PyTypeObject.tp_alloc`) + must be allocated using :c:macro:`PyObject_GC_New` or + :c:func:`PyType_GenericAlloc` and deallocated (see + :c:member:`~PyTypeObject.tp_free`) using :c:func:`PyObject_GC_Del`. More + information in section :ref:`supporting-cycle-detection`. **Inheritance:** @@ -1416,8 +1467,9 @@ and :c:data:`PyType_Type` effectively act as defaults.) :mod:`!_thread` extension module:: static int - local_traverse(localobject *self, visitproc visit, void *arg) + local_traverse(PyObject *op, visitproc visit, void *arg) { + localobject *self = (localobject *) op; Py_VISIT(self->args); Py_VISIT(self->kw); Py_VISIT(self->dict); @@ -1472,6 +1524,11 @@ and :c:data:`PyType_Type` effectively act as defaults.) heap-allocated superclass). If they do not, the type object may not be garbage-collected. + .. note:: + + The :c:member:`~PyTypeObject.tp_traverse` function can be called from any + thread. + .. versionchanged:: 3.9 Heap-allocated types are expected to visit ``Py_TYPE(self)`` in @@ -1491,28 +1548,110 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: inquiry PyTypeObject.tp_clear - An optional pointer to a clear function for the garbage collector. This is only - used if the :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit is set. The signature is:: + An optional pointer to a clear function. The signature is:: int tp_clear(PyObject *); - The :c:member:`~PyTypeObject.tp_clear` member function is used to break reference cycles in cyclic - garbage detected by the garbage collector. Taken together, all :c:member:`~PyTypeObject.tp_clear` - functions in the system must combine to break all reference cycles. This is - subtle, and if in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For example, - the tuple type does not implement a :c:member:`~PyTypeObject.tp_clear` function, because it's - possible to prove that no reference cycle can be composed entirely of tuples. - Therefore the :c:member:`~PyTypeObject.tp_clear` functions of other types must be sufficient to - break any cycle containing a tuple. This isn't immediately obvious, and there's - rarely a good reason to avoid implementing :c:member:`~PyTypeObject.tp_clear`. + The purpose of this function is to break reference cycles that are causing a + :term:`cyclic isolate` so that the objects can be safely destroyed. A + cleared object is a partially destroyed object; the object is not obligated + to satisfy design invariants held during normal use. + + :c:member:`!tp_clear` does not need to delete references to objects that + can't participate in reference cycles, such as Python strings or Python + integers. However, it may be convenient to clear all references, and write + the type's :c:member:`~PyTypeObject.tp_dealloc` function to invoke + :c:member:`!tp_clear` to avoid code duplication. (Beware that + :c:member:`!tp_clear` might have already been called. Prefer calling + idempotent functions like :c:func:`Py_CLEAR`.) + + Any non-trivial cleanup should be performed in + :c:member:`~PyTypeObject.tp_finalize` instead of :c:member:`!tp_clear`. + + .. note:: + + If :c:member:`!tp_clear` fails to break a reference cycle then the + objects in the :term:`cyclic isolate` may remain indefinitely + uncollectable ("leak"). See :data:`gc.garbage`. + + .. note:: + + Referents (direct and indirect) might have already been cleared; they are + not guaranteed to be in a consistent state. + + .. note:: + + The :c:member:`~PyTypeObject.tp_clear` function can be called from any + thread. + + .. note:: + + An object is not guaranteed to be automatically cleared before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. + + This function differs from the destructor + (:c:member:`~PyTypeObject.tp_dealloc`) in the following ways: + + * The purpose of clearing an object is to remove references to other objects + that might participate in a reference cycle. The purpose of the + destructor, on the other hand, is a superset: it must release *all* + resources it owns, including references to objects that cannot participate + in a reference cycle (e.g., integers) as well as the object's own memory + (by calling :c:member:`~PyTypeObject.tp_free`). + * When :c:member:`!tp_clear` is called, other objects might still hold + references to the object being cleared. Because of this, + :c:member:`!tp_clear` must not deallocate the object's own memory + (:c:member:`~PyTypeObject.tp_free`). The destructor, on the other hand, + is only called when no (strong) references exist, and as such, must + safely destroy the object itself by deallocating it. + * :c:member:`!tp_clear` might never be automatically called. An object's + destructor, on the other hand, will be automatically called some time + after the object becomes unreachable (i.e., either there are no references + to the object or the object is a member of a :term:`cyclic isolate`). + + No guarantees are made about when, if, or how often Python automatically + clears an object, except: + + * Python will not automatically clear an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically clear an object if it has not been + automatically finalized (see :c:member:`~PyTypeObject.tp_finalize`). (If + the finalizer resurrected the object, the object may or may not be + automatically finalized again before it is cleared.) + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically clear it if any member of the cyclic isolate has not yet + been automatically finalized (:c:member:`~PyTypeObject.tp_finalize`). + * Python will not destroy an object until after any automatic calls to its + :c:member:`!tp_clear` function have returned. This ensures that the act + of breaking a reference cycle does not invalidate the ``self`` pointer + while :c:member:`!tp_clear` is still executing. + * Python will not automatically call :c:member:`!tp_clear` multiple times + concurrently. + + CPython currently only automatically clears objects as needed to break + reference cycles in a :term:`cyclic isolate`, but future versions might + clear objects regularly before their destruction. + + Taken together, all :c:member:`~PyTypeObject.tp_clear` functions in the + system must combine to break all reference cycles. This is subtle, and if + in any doubt supply a :c:member:`~PyTypeObject.tp_clear` function. For + example, the tuple type does not implement a + :c:member:`~PyTypeObject.tp_clear` function, because it's possible to prove + that no reference cycle can be composed entirely of tuples. Therefore the + :c:member:`~PyTypeObject.tp_clear` functions of other types are responsible + for breaking any cycle containing a tuple. This isn't immediately obvious, + and there's rarely a good reason to avoid implementing + :c:member:`~PyTypeObject.tp_clear`. Implementations of :c:member:`~PyTypeObject.tp_clear` should drop the instance's references to those of its members that may be Python objects, and set its pointers to those members to ``NULL``, as in the following example:: static int - local_clear(localobject *self) + local_clear(PyObject *op) { + localobject *self = (localobject *) op; Py_CLEAR(self->key); Py_CLEAR(self->args); Py_CLEAR(self->kw); @@ -1538,18 +1677,6 @@ and :c:data:`PyType_Type` effectively act as defaults.) PyObject_ClearManagedDict((PyObject*)self); - Note that :c:member:`~PyTypeObject.tp_clear` is not *always* called - before an instance is deallocated. For example, when reference counting - is enough to determine that an object is no longer used, the cyclic garbage - collector is not involved and :c:member:`~PyTypeObject.tp_dealloc` is - called directly. - - Because the goal of :c:member:`~PyTypeObject.tp_clear` functions is to break reference cycles, - it's not necessary to clear contained objects like Python strings or Python - integers, which can't participate in reference cycles. On the other hand, it may - be convenient to clear all contained Python objects, and write the type's - :c:member:`~PyTypeObject.tp_dealloc` function to invoke :c:member:`~PyTypeObject.tp_clear`. - More information about Python's garbage collection scheme can be found in section :ref:`supporting-cycle-detection`. @@ -1562,6 +1689,10 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:member:`~PyTypeObject.tp_clear` are all inherited from the base type if they are all zero in the subtype. + .. seealso:: + + :ref:`life-cycle` for details about how this slot relates to other slots. + .. c:member:: richcmpfunc PyTypeObject.tp_richcompare @@ -1938,18 +2069,17 @@ and :c:data:`PyType_Type` effectively act as defaults.) **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement). + Static subtypes inherit this slot, which will be + :c:func:`PyType_GenericAlloc` if inherited from :class:`object`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - For dynamic subtypes, this field is always set to - :c:func:`PyType_GenericAlloc`, to force a standard heap - allocation strategy. + For heap subtypes, this field is always set to + :c:func:`PyType_GenericAlloc`. - For static subtypes, :c:data:`PyBaseObject_Type` uses - :c:func:`PyType_GenericAlloc`. That is the recommended value - for all statically defined types. + For static subtypes, this slot is inherited (see above). .. c:member:: newfunc PyTypeObject.tp_new @@ -1997,20 +2127,27 @@ and :c:data:`PyType_Type` effectively act as defaults.) void tp_free(void *self); - An initializer that is compatible with this signature is :c:func:`PyObject_Free`. + This function must free the memory allocated by + :c:member:`~PyTypeObject.tp_alloc`. **Inheritance:** - This field is inherited by static subtypes, but not by dynamic - subtypes (subtypes created by a class statement) + Static subtypes inherit this slot, which will be :c:func:`PyObject_Free` if + inherited from :class:`object`. Exception: If the type supports garbage + collection (i.e., the :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and it would inherit + :c:func:`PyObject_Free`, then this slot is not inherited but instead defaults + to :c:func:`PyObject_GC_Del`. + + :ref:`Heap subtypes ` do not inherit this slot. **Default:** - In dynamic subtypes, this field is set to a deallocator suitable to - match :c:func:`PyType_GenericAlloc` and the value of the - :c:macro:`Py_TPFLAGS_HAVE_GC` flag bit. + For :ref:`heap subtypes `, this slot defaults to a deallocator suitable to match + :c:func:`PyType_GenericAlloc` and the value of the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag. - For static subtypes, :c:data:`PyBaseObject_Type` uses :c:func:`PyObject_Free`. + For static subtypes, this slot is inherited (see above). .. c:member:: inquiry PyTypeObject.tp_is_gc @@ -2137,29 +2274,138 @@ and :c:data:`PyType_Type` effectively act as defaults.) .. c:member:: destructor PyTypeObject.tp_finalize - An optional pointer to an instance finalization function. Its signature is:: + An optional pointer to an instance finalization function. This is the C + implementation of the :meth:`~object.__del__` special method. Its signature + is:: void tp_finalize(PyObject *self); - If :c:member:`~PyTypeObject.tp_finalize` is set, the interpreter calls it once when - finalizing an instance. It is called either from the garbage - collector (if the instance is part of an isolated reference cycle) or - just before the object is deallocated. Either way, it is guaranteed - to be called before attempting to break reference cycles, ensuring - that it finds the object in a sane state. + The primary purpose of finalization is to perform any non-trivial cleanup + that must be performed before the object is destroyed, while the object and + any other objects it directly or indirectly references are still in a + consistent state. The finalizer is allowed to execute + arbitrary Python code. - :c:member:`~PyTypeObject.tp_finalize` should not mutate the current exception status; - therefore, a recommended way to write a non-trivial finalizer is:: + Before Python automatically finalizes an object, some of the object's direct + or indirect referents might have themselves been automatically finalized. + However, none of the referents will have been automatically cleared + (:c:member:`~PyTypeObject.tp_clear`) yet. + + Other non-finalized objects might still be using a finalized object, so the + finalizer must leave the object in a sane state (e.g., invariants are still + met). + + .. note:: + + After Python automatically finalizes an object, Python might start + automatically clearing (:c:member:`~PyTypeObject.tp_clear`) the object + and its referents (direct and indirect). Cleared objects are not + guaranteed to be in a consistent state; a finalized object must be able + to tolerate cleared referents. + + .. note:: + + An object is not guaranteed to be automatically finalized before its + destructor (:c:member:`~PyTypeObject.tp_dealloc`) is called. It is + recommended to call :c:func:`PyObject_CallFinalizerFromDealloc` at the + beginning of :c:member:`!tp_dealloc` to guarantee that the object is + always finalized before destruction. + + .. note:: + + The :c:member:`~PyTypeObject.tp_finalize` function can be called from any + thread, although the :term:`GIL` will be held. + + .. note:: + + The :c:member:`!tp_finalize` function can be called during shutdown, + after some global variables have been deleted. See the documentation of + the :meth:`~object.__del__` method for details. + + When Python finalizes an object, it behaves like the following algorithm: + + #. Python might mark the object as *finalized*. Currently, Python always + marks objects whose type supports garbage collection (i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is set in + :c:member:`~PyTypeObject.tp_flags`) and never marks other types of + objects; this might change in a future version. + #. If the object is not marked as *finalized* and its + :c:member:`!tp_finalize` finalizer function is non-``NULL``, the + finalizer function is called. + #. If the finalizer function was called and the finalizer made the object + reachable (i.e., there is a reference to the object and it is not a + member of a :term:`cyclic isolate`), then the finalizer is said to have + *resurrected* the object. It is unspecified whether the finalizer can + also resurrect the object by adding a new reference to the object that + does not make it reachable, i.e., the object is (still) a member of a + cyclic isolate. + #. If the finalizer resurrected the object, the object's pending destruction + is canceled and the object's *finalized* mark might be removed if + present. Currently, Python never removes the *finalized* mark; this + might change in a future version. + + *Automatic finalization* refers to any finalization performed by Python + except via calls to :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. No guarantees are made about + when, if, or how often an object is automatically finalized, except: + + * Python will not automatically finalize an object if it is reachable, i.e., + there is a reference to it and it is not a member of a :term:`cyclic + isolate`. + * Python will not automatically finalize an object if finalizing it would + not mark the object as *finalized*. Currently, this applies to objects + whose type does not support garbage collection, i.e., the + :c:macro:`Py_TPFLAGS_HAVE_GC` flag is not set. Such objects can still be + manually finalized by calling :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc`. + * Python will not automatically finalize any two members of a :term:`cyclic + isolate` concurrently. + * Python will not automatically finalize an object after it has + automatically cleared (:c:member:`~PyTypeObject.tp_clear`) the object. + * If an object is a member of a :term:`cyclic isolate`, Python will not + automatically finalize it after automatically clearing (see + :c:member:`~PyTypeObject.tp_clear`) any other member. + * Python will automatically finalize every member of a :term:`cyclic + isolate` before it automatically clears (see + :c:member:`~PyTypeObject.tp_clear`) any of them. + * If Python is going to automatically clear an object + (:c:member:`~PyTypeObject.tp_clear`), it will automatically finalize the + object first. + + Python currently only automatically finalizes objects that are members of a + :term:`cyclic isolate`, but future versions might finalize objects regularly + before their destruction. + + To manually finalize an object, do not call this function directly; call + :c:func:`PyObject_CallFinalizer` or + :c:func:`PyObject_CallFinalizerFromDealloc` instead. + + :c:member:`~PyTypeObject.tp_finalize` should leave the current exception + status unchanged. The recommended way to write a non-trivial finalizer is + to back up the exception at the beginning by calling + :c:func:`PyErr_GetRaisedException` and restore the exception at the end by + calling :c:func:`PyErr_SetRaisedException`. If an exception is encountered + in the middle of the finalizer, log and clear it with + :c:func:`PyErr_WriteUnraisable` or :c:func:`PyErr_FormatUnraisable`. For + example:: static void - local_finalize(PyObject *self) + foo_finalize(PyObject *self) { - /* Save the current exception, if any. */ + // Save the current exception, if any. PyObject *exc = PyErr_GetRaisedException(); - /* ... */ + // ... - /* Restore the saved exception. */ + if (do_something_that_might_raise() != success_indicator) { + PyErr_WriteUnraisable(self); + goto done; + } + + done: + // Restore the saved exception. This silently discards any exception + // raised above, so be sure to call PyErr_WriteUnraisable first if + // necessary. PyErr_SetRaisedException(exc); } @@ -2175,7 +2421,13 @@ and :c:data:`PyType_Type` effectively act as defaults.) :c:macro:`Py_TPFLAGS_HAVE_FINALIZE` flags bit in order for this field to be used. This is no longer required. - .. seealso:: "Safe object finalization" (:pep:`442`) + .. seealso:: + + * :pep:`442`: "Safe object finalization" + * :ref:`life-cycle` for details about how this slot relates to other + slots. + * :c:func:`PyObject_CallFinalizer` + * :c:func:`PyObject_CallFinalizerFromDealloc` .. c:member:: vectorcallfunc PyTypeObject.tp_vectorcall diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 31063962ae5..cdd90d05b70 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -33,8 +33,14 @@ Python: .. c:var:: PyTypeObject PyUnicode_Type - This instance of :c:type:`PyTypeObject` represents the Python Unicode type. It - is exposed to Python code as :py:class:`str`. + This instance of :c:type:`PyTypeObject` represents the Python Unicode type. + It is exposed to Python code as :py:class:`str`. + + +.. c:var:: PyTypeObject PyUnicodeIter_Type + + This instance of :c:type:`PyTypeObject` represents the Python Unicode + iterator type. It is used to iterate over Unicode string objects. .. c:type:: Py_UCS4 @@ -596,6 +602,14 @@ APIs: Objects other than Unicode or its subtypes will cause a :exc:`TypeError`. +.. c:function:: PyObject* PyUnicode_FromOrdinal(int ordinal) + + Create a Unicode Object from the given Unicode code point *ordinal*. + + The ordinal must be in ``range(0x110000)``. A :exc:`ValueError` is + raised in the case it is not. + + .. c:function:: PyObject* PyUnicode_FromEncodedObject(PyObject *obj, \ const char *encoding, const char *errors) @@ -622,7 +636,7 @@ APIs: On error, set *\*p_left* to ``NULL`` and set an exception. - On sucess, set *\*p_left* to a new strong reference to the result. + On success, set *\*p_left* to a new strong reference to the result. .. c:function:: void PyUnicode_AppendAndDel(PyObject **p_left, PyObject *right) @@ -631,6 +645,17 @@ APIs: difference being that it decrements the reference count of *right* by one. +.. c:function:: PyObject* PyUnicode_BuildEncodingMap(PyObject* string) + + Return a mapping suitable for decoding a custom single-byte encoding. + Given a Unicode string *string* of up to 256 characters representing an encoding + table, returns either a compact internal mapping object or a dictionary + mapping character ordinals to byte values. Raises a :exc:`TypeError` and + return ``NULL`` on invalid input. + + .. versionadded:: 3.2 + + .. c:function:: const char* PyUnicode_GetDefaultEncoding(void) Return the name of the default string encoding, ``"utf-8"``. @@ -666,6 +691,21 @@ APIs: .. versionadded:: 3.3 +.. c:function:: int PyUnicode_Resize(PyObject **unicode, Py_ssize_t length); + + Resize a Unicode object *\*unicode* to the new *length* in code points. + + Try to resize the string in place (which is usually faster than allocating + a new string and copying characters), or create a new string. + + *\*unicode* is modified to point to the new (resized) object and ``0`` is + returned on success. Otherwise, ``-1`` is returned and an exception is set, + and *\*unicode* is left untouched. + + The function doesn't check string content, the result may not be a + string in canonical representation. + + .. c:function:: Py_ssize_t PyUnicode_Fill(PyObject *unicode, Py_ssize_t start, \ Py_ssize_t length, Py_UCS4 fill_char) @@ -1003,6 +1043,17 @@ generic ones are documented for simplicity. Generic Codecs """""""""""""" +The following macro is provided: + + +.. c:macro:: Py_UNICODE_REPLACEMENT_CHARACTER + + The Unicode code point ``U+FFFD`` (replacement character). + + This Unicode character is used as the replacement character during + decoding if the *errors* argument is set to "replace". + + These are the generic codec APIs: diff --git a/Doc/conf.py b/Doc/conf.py index 467961dd5e2..7fadad66cb3 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -308,7 +308,6 @@ ('py:attr', '__annotations__'), ('py:meth', '__missing__'), ('py:attr', '__wrapped__'), - ('py:attr', 'decimal.Context.clamp'), ('py:meth', 'index'), # list.index, tuple.index, etc. ] diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 14629fbff0f..59b31ccf7bc 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -1093,9 +1093,6 @@ PyImport_ImportModuleLevelObject:PyObject*:locals:0:??? PyImport_ImportModuleLevelObject:PyObject*:fromlist:0:??? PyImport_ImportModuleLevelObject:int:level:: -PyImport_ImportModuleNoBlock:PyObject*::+1: -PyImport_ImportModuleNoBlock:const char*:name:: - PyImport_ReloadModule:PyObject*::+1: PyImport_ReloadModule:PyObject*:m:0: @@ -2770,6 +2767,9 @@ PyUnicode_FromFormatV:PyObject*::+1: PyUnicode_FromFormatV:const char*:format:: PyUnicode_FromFormatV:va_list:args:: +PyUnicode_FromOrdinal:PyObject*::+1: +PyUnicode_FromOrdinal:int:ordinal:: + PyUnicode_Append:void::: PyUnicode_Append:PyObject**:p_left:0: PyUnicode_Append:PyObject*:right:: @@ -2778,6 +2778,9 @@ PyUnicode_AppendAndDel:void::: PyUnicode_AppendAndDel:PyObject**:p_left:0: PyUnicode_AppendAndDel:PyObject*:right:-1: +PyUnicode_BuildEncodingMap:PyObject*::+1: +PyUnicode_BuildEncodingMap:PyObject*:string::: + PyUnicode_GetDefaultEncoding:const char*::: PyUnicode_GetDefaultEncoding::void:: @@ -2791,6 +2794,10 @@ PyUnicode_CopyCharacters:PyObject*:from:0: PyUnicode_CopyCharacters:Py_ssize_t:from_start:: PyUnicode_CopyCharacters:Py_ssize_t:how_many:: +PyUnicode_Resize:int::: +PyUnicode_Resize:PyObject**:unicode:0: +PyUnicode_Resize:Py_ssize_t:length:: + PyUnicode_Fill:Py_ssize_t::: PyUnicode_Fill:PyObject*:unicode:0: PyUnicode_Fill:Py_ssize_t:start:: @@ -3000,18 +3007,8 @@ Py_GetCompiler:const char*::: Py_GetCopyright:const char*::: -Py_GetExecPrefix:wchar_t*::: - -Py_GetPath:wchar_t*::: - Py_GetPlatform:const char*::: -Py_GetPrefix:wchar_t*::: - -Py_GetProgramFullPath:wchar_t*::: - -Py_GetProgramName:wchar_t*::: - Py_GetVersion:const char*::: Py_INCREF:void::: diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index c15f82603aa..e71a40e55e9 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -323,7 +323,6 @@ func,PyImport_ImportFrozenModuleObject,3.7,, func,PyImport_ImportModule,3.2,, func,PyImport_ImportModuleLevel,3.2,, func,PyImport_ImportModuleLevelObject,3.7,, -func,PyImport_ImportModuleNoBlock,3.2,, func,PyImport_ReloadModule,3.2,, func,PyIndex_Check,3.8,, type,PyInterpreterState,3.2,,opaque @@ -362,6 +361,7 @@ func,PyLong_AsLong,3.2,, func,PyLong_AsLongAndOverflow,3.2,, func,PyLong_AsLongLong,3.2,, func,PyLong_AsLongLongAndOverflow,3.2,, +func,PyLong_AsNativeBytes,3.14,, func,PyLong_AsSize_t,3.2,, func,PyLong_AsSsize_t,3.2,, func,PyLong_AsUInt32,3.14,, @@ -376,6 +376,7 @@ func,PyLong_FromInt32,3.14,, func,PyLong_FromInt64,3.14,, func,PyLong_FromLong,3.2,, func,PyLong_FromLongLong,3.2,, +func,PyLong_FromNativeBytes,3.14,, func,PyLong_FromSize_t,3.2,, func,PyLong_FromSsize_t,3.2,, func,PyLong_FromString,3.2,, @@ -383,6 +384,7 @@ func,PyLong_FromUInt32,3.14,, func,PyLong_FromUInt64,3.14,, func,PyLong_FromUnsignedLong,3.2,, func,PyLong_FromUnsignedLongLong,3.2,, +func,PyLong_FromUnsignedNativeBytes,3.14,, func,PyLong_FromVoidPtr,3.2,, func,PyLong_GetInfo,3.2,, data,PyLong_Type,3.2,, @@ -737,11 +739,7 @@ func,PyUnicode_Append,3.2,, func,PyUnicode_AppendAndDel,3.2,, func,PyUnicode_AsASCIIString,3.2,, func,PyUnicode_AsCharmapString,3.2,, -func,PyUnicode_AsDecodedObject,3.2,, -func,PyUnicode_AsDecodedUnicode,3.2,, -func,PyUnicode_AsEncodedObject,3.2,, func,PyUnicode_AsEncodedString,3.2,, -func,PyUnicode_AsEncodedUnicode,3.2,, func,PyUnicode_AsLatin1String,3.2,, func,PyUnicode_AsMBCSString,3.7,on Windows, func,PyUnicode_AsRawUnicodeEscapeString,3.2,, @@ -859,13 +857,7 @@ func,Py_GetCompiler,3.2,, func,Py_GetConstant,3.13,, func,Py_GetConstantBorrowed,3.13,, func,Py_GetCopyright,3.2,, -func,Py_GetExecPrefix,3.2,, -func,Py_GetPath,3.2,, func,Py_GetPlatform,3.2,, -func,Py_GetPrefix,3.2,, -func,Py_GetProgramFullPath,3.2,, -func,Py_GetProgramName,3.2,, -func,Py_GetPythonHome,3.2,, func,Py_GetRecursionLimit,3.2,, func,Py_GetVersion,3.2,, data,Py_HasFileSystemDefaultEncoding,3.2,, diff --git a/Doc/deprecations/c-api-pending-removal-in-3.15.rst b/Doc/deprecations/c-api-pending-removal-in-3.15.rst index 666a1622dd0..b87f0a5ecde 100644 --- a/Doc/deprecations/c-api-pending-removal-in-3.15.rst +++ b/Doc/deprecations/c-api-pending-removal-in-3.15.rst @@ -2,7 +2,7 @@ Pending removal in Python 3.15 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * The bundled copy of ``libmpdecimal``. -* The :c:func:`PyImport_ImportModuleNoBlock`: +* The :c:func:`!PyImport_ImportModuleNoBlock`: Use :c:func:`PyImport_ImportModule` instead. * :c:func:`PyWeakref_GetObject` and :c:func:`PyWeakref_GET_OBJECT`: Use :c:func:`PyWeakref_GetRef` instead. The `pythoncapi-compat project @@ -10,29 +10,39 @@ Pending removal in Python 3.15 :c:func:`PyWeakref_GetRef` on Python 3.12 and older. * :c:type:`Py_UNICODE` type and the :c:macro:`!Py_UNICODE_WIDE` macro: Use :c:type:`wchar_t` instead. +* :c:func:`!PyUnicode_AsDecodedObject`: + Use :c:func:`PyCodec_Decode` instead. +* :c:func:`!PyUnicode_AsDecodedUnicode`: + Use :c:func:`PyCodec_Decode` instead; Note that some codecs (for example, "base64") + may return a type other than :class:`str`, such as :class:`bytes`. +* :c:func:`!PyUnicode_AsEncodedObject`: + Use :c:func:`PyCodec_Encode` instead. +* :c:func:`!PyUnicode_AsEncodedUnicode`: + Use :c:func:`PyCodec_Encode` instead; Note that some codecs (for example, "base64") + may return a type other than :class:`bytes`, such as :class:`str`. * Python initialization functions, deprecated in Python 3.13: - * :c:func:`Py_GetPath`: + * :c:func:`!Py_GetPath`: Use :c:func:`PyConfig_Get("module_search_paths") ` (:data:`sys.path`) instead. - * :c:func:`Py_GetPrefix`: + * :c:func:`!Py_GetPrefix`: Use :c:func:`PyConfig_Get("base_prefix") ` (:data:`sys.base_prefix`) instead. Use :c:func:`PyConfig_Get("prefix") ` (:data:`sys.prefix`) if :ref:`virtual environments ` need to be handled. - * :c:func:`Py_GetExecPrefix`: + * :c:func:`!Py_GetExecPrefix`: Use :c:func:`PyConfig_Get("base_exec_prefix") ` (:data:`sys.base_exec_prefix`) instead. Use :c:func:`PyConfig_Get("exec_prefix") ` (:data:`sys.exec_prefix`) if :ref:`virtual environments ` need to be handled. - * :c:func:`Py_GetProgramFullPath`: + * :c:func:`!Py_GetProgramFullPath`: Use :c:func:`PyConfig_Get("executable") ` (:data:`sys.executable`) instead. - * :c:func:`Py_GetProgramName`: + * :c:func:`!Py_GetProgramName`: Use :c:func:`PyConfig_Get("executable") ` (:data:`sys.executable`) instead. - * :c:func:`Py_GetPythonHome`: + * :c:func:`!Py_GetPythonHome`: Use :c:func:`PyConfig_Get("home") ` or the :envvar:`PYTHONHOME` environment variable instead. diff --git a/Doc/deprecations/c-api-pending-removal-in-future.rst b/Doc/deprecations/c-api-pending-removal-in-future.rst index 1003047344a..841d1b455b6 100644 --- a/Doc/deprecations/c-api-pending-removal-in-future.rst +++ b/Doc/deprecations/c-api-pending-removal-in-future.rst @@ -18,14 +18,6 @@ although there is currently no date scheduled for their removal. Use :c:func:`PyOS_AfterFork_Child` instead. * :c:func:`PySlice_GetIndicesEx`: Use :c:func:`PySlice_Unpack` and :c:func:`PySlice_AdjustIndices` instead. -* :c:func:`!PyUnicode_AsDecodedObject`: - Use :c:func:`PyCodec_Decode` instead. -* :c:func:`!PyUnicode_AsDecodedUnicode`: - Use :c:func:`PyCodec_Decode` instead. -* :c:func:`!PyUnicode_AsEncodedObject`: - Use :c:func:`PyCodec_Encode` instead. -* :c:func:`!PyUnicode_AsEncodedUnicode`: - Use :c:func:`PyCodec_Encode` instead. * :c:func:`PyUnicode_READY`: Unneeded since Python 3.12 * :c:func:`!PyErr_Display`: diff --git a/Doc/deprecations/index.rst b/Doc/deprecations/index.rst index bac6e3f18d4..d064f2bec42 100644 --- a/Doc/deprecations/index.rst +++ b/Doc/deprecations/index.rst @@ -5,6 +5,10 @@ Deprecations .. include:: pending-removal-in-3.16.rst +.. include:: pending-removal-in-3.17.rst + +.. include:: pending-removal-in-3.19.rst + .. include:: pending-removal-in-future.rst C API deprecations @@ -12,4 +16,6 @@ C API deprecations .. include:: c-api-pending-removal-in-3.15.rst +.. include:: c-api-pending-removal-in-3.18.rst + .. include:: c-api-pending-removal-in-future.rst diff --git a/Doc/deprecations/pending-removal-in-3.14.rst b/Doc/deprecations/pending-removal-in-3.14.rst index 6159fa48848..9aac10840a6 100644 --- a/Doc/deprecations/pending-removal-in-3.14.rst +++ b/Doc/deprecations/pending-removal-in-3.14.rst @@ -78,7 +78,7 @@ Pending removal in Python 3.14 :meth:`~pathlib.PurePath.relative_to`: passing additional arguments is deprecated. -* :mod:`pkgutil`: :func:`!pkgutil.find_loader` and :func:!pkgutil.get_loader` +* :mod:`pkgutil`: :func:`!pkgutil.find_loader` and :func:`!pkgutil.get_loader` now raise :exc:`DeprecationWarning`; use :func:`importlib.util.find_spec` instead. (Contributed by Nikita Sobolev in :gh:`97850`.) diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index 7b32275ad86..a76d06cce12 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -20,7 +20,7 @@ Pending removal in Python 3.15 * :mod:`http.server`: - * The obsolete and rarely used :class:`~http.server.CGIHTTPRequestHandler` + * The obsolete and rarely used :class:`!CGIHTTPRequestHandler` has been deprecated since Python 3.13. No direct replacement exists. *Anything* is better than CGI to interface @@ -51,7 +51,7 @@ Pending removal in Python 3.15 * :mod:`platform`: - * :func:`~platform.java_ver` has been deprecated since Python 3.13. + * :func:`!platform.java_ver` has been deprecated since Python 3.13. This function is only useful for Jython support, has a confusing API, and is largely untested. @@ -85,6 +85,13 @@ Pending removal in Python 3.15 has been deprecated since Python 3.13. Use the class-based syntax or the functional syntax instead. + * When using the functional syntax of :class:`~typing.TypedDict`\s, failing + to pass a value to the *fields* parameter (``TD = TypedDict("TD")``) or + passing ``None`` (``TD = TypedDict("TD", None)``) has been deprecated + since Python 3.13. + Use ``class TD(TypedDict): pass`` or ``TD = TypedDict("TD", {})`` + to create a TypedDict with zero field. + * The :func:`typing.no_type_check_decorator` decorator function has been deprecated since Python 3.13. After eight years in the :mod:`typing` module, @@ -92,8 +99,7 @@ Pending removal in Python 3.15 * :mod:`wave`: - * The :meth:`~wave.Wave_read.getmark`, :meth:`!setmark`, - and :meth:`~wave.Wave_read.getmarkers` methods of + * The ``getmark()``, ``setmark()`` and ``getmarkers()`` methods of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` classes have been deprecated since Python 3.13. diff --git a/Doc/deprecations/pending-removal-in-3.16.rst b/Doc/deprecations/pending-removal-in-3.16.rst index b408a6d72fe..cdd76ee693f 100644 --- a/Doc/deprecations/pending-removal-in-3.16.rst +++ b/Doc/deprecations/pending-removal-in-3.16.rst @@ -32,7 +32,6 @@ Pending removal in Python 3.16 * :class:`asyncio.WindowsProactorEventLoopPolicy` * :func:`asyncio.get_event_loop_policy` * :func:`asyncio.set_event_loop_policy` - * :func:`asyncio.set_event_loop` Users should use :func:`asyncio.run` or :class:`asyncio.Runner` with *loop_factory* to use the desired event loop implementation. @@ -62,6 +61,20 @@ Pending removal in Python 3.16 * Calling the Python implementation of :func:`functools.reduce` with *function* or *sequence* as keyword arguments has been deprecated since Python 3.14. +* :mod:`logging`: + + Support for custom logging handlers with the *strm* argument is deprecated + and scheduled for removal in Python 3.16. Define handlers with the *stream* + argument instead. (Contributed by Mariusz Felisiak in :gh:`115032`.) + +* :mod:`mimetypes`: + + * Valid extensions start with a '.' or are empty for + :meth:`mimetypes.MimeTypes.add_type`. + Undotted extensions are deprecated and will + raise a :exc:`ValueError` in Python 3.16. + (Contributed by Hugo van Kemenade in :gh:`75223`.) + * :mod:`shutil`: * The :class:`!ExecError` exception diff --git a/Doc/deprecations/pending-removal-in-3.17.rst b/Doc/deprecations/pending-removal-in-3.17.rst new file mode 100644 index 00000000000..370b98307e5 --- /dev/null +++ b/Doc/deprecations/pending-removal-in-3.17.rst @@ -0,0 +1,10 @@ +Pending removal in Python 3.17 +------------------------------ + +* :mod:`typing`: + + - Before Python 3.14, old-style unions were implemented using the private class + ``typing._UnionGenericAlias``. This class is no longer needed for the implementation, + but it has been retained for backward compatibility, with removal scheduled for Python + 3.17. Users should use documented introspection helpers like :func:`typing.get_origin` + and :func:`typing.get_args` instead of relying on private implementation details. diff --git a/Doc/deprecations/pending-removal-in-3.19.rst b/Doc/deprecations/pending-removal-in-3.19.rst new file mode 100644 index 00000000000..3936f63ca5b --- /dev/null +++ b/Doc/deprecations/pending-removal-in-3.19.rst @@ -0,0 +1,8 @@ +Pending removal in Python 3.19 +------------------------------ + +* :mod:`ctypes`: + + * Implicitly switching to the MSVC-compatible struct layout by setting + :attr:`~ctypes.Structure._pack_` but not :attr:`~ctypes.Structure._layout_` + on non-Windows platforms. diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index 30d7d579705..4c4a368baca 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -13,8 +13,6 @@ although there is currently no date scheduled for their removal. deprecated. * The :class:`argparse.FileType` type converter is deprecated. -* :mod:`array`'s ``'u'`` format code (:gh:`57281`) - * :mod:`builtins`: * ``bool(NotImplemented)``. @@ -49,6 +47,8 @@ although there is currently no date scheduled for their removal. :data:`calendar.FEBRUARY`. (Contributed by Prince Roshan in :gh:`103636`.) +* :mod:`codecs`: use :func:`open` instead of :func:`codecs.open`. (:gh:`133038`) + * :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method instead. @@ -153,3 +153,6 @@ although there is currently no date scheduled for their removal. :class:`~xml.etree.ElementTree.Element` is deprecated. In a future release it will always return ``True``. Prefer explicit ``len(elem)`` or ``elem is not None`` tests instead. + +* :func:`sys._clear_type_cache` is deprecated: + use :func:`sys._clear_internal_caches` instead. diff --git a/Doc/extending/newtypes.rst b/Doc/extending/newtypes.rst index 7f57a3a6aac..e3612f3a187 100644 --- a/Doc/extending/newtypes.rst +++ b/Doc/extending/newtypes.rst @@ -70,22 +70,24 @@ object itself needs to be freed here as well. Here is an example of this function:: static void - newdatatype_dealloc(newdatatypeobject *obj) + newdatatype_dealloc(PyObject *op) { - free(obj->obj_UnderlyingDatatypePtr); - Py_TYPE(obj)->tp_free((PyObject *)obj); + newdatatypeobject *self = (newdatatypeobject *) op; + free(self->obj_UnderlyingDatatypePtr); + Py_TYPE(self)->tp_free(self); } If your type supports garbage collection, the destructor should call :c:func:`PyObject_GC_UnTrack` before clearing any member fields:: static void - newdatatype_dealloc(newdatatypeobject *obj) + newdatatype_dealloc(PyObject *op) { - PyObject_GC_UnTrack(obj); - Py_CLEAR(obj->other_obj); + newdatatypeobject *self = (newdatatypeobject *) op; + PyObject_GC_UnTrack(op); + Py_CLEAR(self->other_obj); ... - Py_TYPE(obj)->tp_free((PyObject *)obj); + Py_TYPE(self)->tp_free(self); } .. index:: @@ -117,17 +119,19 @@ done. This can be done using the :c:func:`PyErr_Fetch` and PyErr_Fetch(&err_type, &err_value, &err_traceback); cbresult = PyObject_CallNoArgs(self->my_callback); - if (cbresult == NULL) - PyErr_WriteUnraisable(self->my_callback); - else + if (cbresult == NULL) { + PyErr_WriteUnraisable(self->my_callback); + } + else { Py_DECREF(cbresult); + } /* This restores the saved exception state */ PyErr_Restore(err_type, err_value, err_traceback); Py_DECREF(self->my_callback); } - Py_TYPE(obj)->tp_free((PyObject*)self); + Py_TYPE(self)->tp_free(self); } .. note:: @@ -168,10 +172,11 @@ representation of the instance for which it is called. Here is a simple example:: static PyObject * - newdatatype_repr(newdatatypeobject *obj) + newdatatype_repr(PyObject *op) { + newdatatypeobject *self = (newdatatypeobject *) op; return PyUnicode_FromFormat("Repr-ified_newdatatype{{size:%d}}", - obj->obj_UnderlyingDatatypePtr->size); + self->obj_UnderlyingDatatypePtr->size); } If no :c:member:`~PyTypeObject.tp_repr` handler is specified, the interpreter will supply a @@ -188,10 +193,11 @@ used instead. Here is a simple example:: static PyObject * - newdatatype_str(newdatatypeobject *obj) + newdatatype_str(PyObject *op) { + newdatatypeobject *self = (newdatatypeobject *) op; return PyUnicode_FromFormat("Stringified_newdatatype{{size:%d}}", - obj->obj_UnderlyingDatatypePtr->size); + self->obj_UnderlyingDatatypePtr->size); } @@ -329,16 +335,16 @@ method of a class would be called. Here is an example:: static PyObject * - newdatatype_getattr(newdatatypeobject *obj, char *name) + newdatatype_getattr(PyObject *op, char *name) { - if (strcmp(name, "data") == 0) - { - return PyLong_FromLong(obj->data); + newdatatypeobject *self = (newdatatypeobject *) op; + if (strcmp(name, "data") == 0) { + return PyLong_FromLong(self->data); } PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.400s'", - Py_TYPE(obj)->tp_name, name); + Py_TYPE(self)->tp_name, name); return NULL; } @@ -349,7 +355,7 @@ example that simply raises an exception; if this were really all you wanted, the :c:member:`~PyTypeObject.tp_setattr` handler should be set to ``NULL``. :: static int - newdatatype_setattr(newdatatypeobject *obj, char *name, PyObject *v) + newdatatype_setattr(PyObject *op, char *name, PyObject *v) { PyErr_Format(PyExc_RuntimeError, "Read-only attribute: %s", name); return -1; @@ -379,8 +385,10 @@ Here is a sample implementation, for a datatype that is considered equal if the size of an internal pointer is equal:: static PyObject * - newdatatype_richcmp(newdatatypeobject *obj1, newdatatypeobject *obj2, int op) + newdatatype_richcmp(PyObject *lhs, PyObject *rhs, int op) { + newdatatypeobject *obj1 = (newdatatypeobject *) lhs; + newdatatypeobject *obj2 = (newdatatypeobject *) rhs; PyObject *result; int c, size1, size2; @@ -399,8 +407,7 @@ size of an internal pointer is equal:: case Py_GE: c = size1 >= size2; break; } result = c ? Py_True : Py_False; - Py_INCREF(result); - return result; + return Py_NewRef(result); } @@ -439,12 +446,14 @@ This function, if you choose to provide it, should return a hash number for an instance of your data type. Here is a simple example:: static Py_hash_t - newdatatype_hash(newdatatypeobject *obj) + newdatatype_hash(PyObject *op) { + newdatatypeobject *self = (newdatatypeobject *) op; Py_hash_t result; - result = obj->some_size + 32767 * obj->some_number; - if (result == -1) - result = -2; + result = self->some_size + 32767 * self->some_number; + if (result == -1) { + result = -2; + } return result; } @@ -478,8 +487,9 @@ This function takes three arguments: Here is a toy ``tp_call`` implementation:: static PyObject * - newdatatype_call(newdatatypeobject *obj, PyObject *args, PyObject *kwds) + newdatatype_call(PyObject *op, PyObject *args, PyObject *kwds) { + newdatatypeobject *self = (newdatatypeobject *) op; PyObject *result; const char *arg1; const char *arg2; @@ -490,7 +500,7 @@ Here is a toy ``tp_call`` implementation:: } result = PyUnicode_FromFormat( "Returning -- value: [%d] arg1: [%s] arg2: [%s] arg3: [%s]\n", - obj->obj_UnderlyingDatatypePtr->size, + self->obj_UnderlyingDatatypePtr->size, arg1, arg2, arg3); return result; } @@ -563,12 +573,12 @@ The only further addition is that ``tp_dealloc`` needs to clear any weak references (by calling :c:func:`PyObject_ClearWeakRefs`):: static void - Trivial_dealloc(TrivialObject *self) + Trivial_dealloc(PyObject *op) { /* Clear weakrefs first before calling any destructors */ - PyObject_ClearWeakRefs((PyObject *) self); + PyObject_ClearWeakRefs(op); /* ... remainder of destruction code omitted for brevity ... */ - Py_TYPE(self)->tp_free((PyObject *) self); + Py_TYPE(op)->tp_free(op); } diff --git a/Doc/faq/design.rst b/Doc/faq/design.rst index e2710fab9cf..c758c019ca4 100644 --- a/Doc/faq/design.rst +++ b/Doc/faq/design.rst @@ -420,10 +420,12 @@ strings representing the files in the current directory. Functions which operate on this output would generally not break if you added another file or two to the directory. -Tuples are immutable, meaning that once a tuple has been created, you can't -replace any of its elements with a new value. Lists are mutable, meaning that -you can always change a list's elements. Only immutable elements can be used as -dictionary keys, and hence only tuples and not lists can be used as keys. +Tuples are :term:`immutable`, meaning that once a tuple has been created, you can't +replace any of its elements with a new value. Lists are :term:`mutable`, meaning that +you can always change a list's elements. Only :term:`hashable` objects can +be used as dictionary keys. Most immutable types are hashable, which is why +tuples, but not lists, can be used as keys. Note, however, that a tuple is +only hashable if all of its elements are hashable. How are lists implemented in CPython? diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 0b26e18efd7..c5c7994f126 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -355,6 +355,12 @@ Glossary tasks (see :mod:`asyncio`) associate each task with a context which becomes the current context whenever the task starts or resumes execution. + cyclic isolate + A subgroup of one or more objects that reference each other in a reference + cycle, but are not referenced by objects outside the group. The goal of + the :term:`cyclic garbage collector ` is to identify these groups and break the reference + cycles so that the memory can be reclaimed. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for diff --git a/Doc/howto/annotations.rst b/Doc/howto/annotations.rst index 78f3704ba5d..d7deb6c6bc1 100644 --- a/Doc/howto/annotations.rst +++ b/Doc/howto/annotations.rst @@ -248,4 +248,9 @@ quirks by using :func:`annotationlib.get_annotations` on Python 3.14+ or :func:`inspect.get_annotations` on Python 3.10+. On earlier versions of Python, you can avoid these bugs by accessing the annotations from the class's :attr:`~type.__dict__` -(e.g., ``cls.__dict__.get('__annotations__', None)``). +(for example, ``cls.__dict__.get('__annotations__', None)``). + +In some versions of Python, instances of classes may have an ``__annotations__`` +attribute. However, this is not supported functionality. If you need the +annotations of an instance, you can use :func:`type` to access its class +(for example, ``annotationlib.get_annotations(type(myinstance))`` on Python 3.14+). diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 95f214179bf..175bb5dc831 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -23,6 +23,14 @@ You can use it to enable code that only runs under the free-threaded build:: /* code that only runs in the free-threaded build */ #endif +.. note:: + + On Windows, this macro is not defined automatically, but must be specified + to the compiler when building. The :func:`sysconfig.get_config_var` function + can be used to determine whether the current running interpreter had the + macro defined. + + Module Initialization ===================== @@ -243,6 +251,141 @@ depend on your extension, but some common patterns include: `thread-local storage `_. +Critical Sections +================= + +.. _critical-sections: + +In the free-threaded build, CPython provides a mechanism called "critical +sections" to protect data that would otherwise be protected by the GIL. +While extension authors may not interact with the internal critical section +implementation directly, understanding their behavior is crucial when using +certain C API functions or managing shared state in the free-threaded build. + +What Are Critical Sections? +........................... + +Conceptually, critical sections act as a deadlock avoidance layer built on +top of simple mutexes. Each thread maintains a stack of active critical +sections. When a thread needs to acquire a lock associated with a critical +section (e.g., implicitly when calling a thread-safe C API function like +:c:func:`PyDict_SetItem`, or explicitly using macros), it attempts to acquire +the underlying mutex. + +Using Critical Sections +....................... + +The primary APIs for using critical sections are: + +* :c:macro:`Py_BEGIN_CRITICAL_SECTION` and :c:macro:`Py_END_CRITICAL_SECTION` - + For locking a single object + +* :c:macro:`Py_BEGIN_CRITICAL_SECTION2` and :c:macro:`Py_END_CRITICAL_SECTION2` + - For locking two objects simultaneously + +These macros must be used in matching pairs and must appear in the same C +scope, since they establish a new local scope. These macros are no-ops in +non-free-threaded builds, so they can be safely added to code that needs to +support both build types. + +A common use of a critical section would be to lock an object while accessing +an internal attribute of it. For example, if an extension type has an internal +count field, you could use a critical section while reading or writing that +field:: + + // read the count, returns new reference to internal count value + PyObject *result; + Py_BEGIN_CRITICAL_SECTION(obj); + result = Py_NewRef(obj->count); + Py_END_CRITICAL_SECTION(); + return result; + + // write the count, consumes reference from new_count + Py_BEGIN_CRITICAL_SECTION(obj); + obj->count = new_count; + Py_END_CRITICAL_SECTION(); + + +How Critical Sections Work +.......................... + +Unlike traditional locks, critical sections do not guarantee exclusive access +throughout their entire duration. If a thread would block while holding a +critical section (e.g., by acquiring another lock or performing I/O), the +critical section is temporarily suspended—all locks are released—and then +resumed when the blocking operation completes. + +This behavior is similar to what happens with the GIL when a thread makes a +blocking call. The key differences are: + +* Critical sections operate on a per-object basis rather than globally + +* Critical sections follow a stack discipline within each thread (the "begin" and + "end" macros enforce this since they must be paired and within the same scope) + +* Critical sections automatically release and reacquire locks around potential + blocking operations + +Deadlock Avoidance +.................. + +Critical sections help avoid deadlocks in two ways: + +1. If a thread tries to acquire a lock that's already held by another thread, + it first suspends all of its active critical sections, temporarily releasing + their locks + +2. When the blocking operation completes, only the top-most critical section is + reacquired first + +This means you cannot rely on nested critical sections to lock multiple objects +at once, as the inner critical section may suspend the outer ones. Instead, use +:c:macro:`Py_BEGIN_CRITICAL_SECTION2` to lock two objects simultaneously. + +Note that the locks described above are only :c:type:`!PyMutex` based locks. +The critical section implementation does not know about or affect other locking +mechanisms that might be in use, like POSIX mutexes. Also note that while +blocking on any :c:type:`!PyMutex` causes the critical sections to be +suspended, only the mutexes that are part of the critical sections are +released. If :c:type:`!PyMutex` is used without a critical section, it will +not be released and therefore does not get the same deadlock avoidance. + +Important Considerations +........................ + +* Critical sections may temporarily release their locks, allowing other threads + to modify the protected data. Be careful about making assumptions about the + state of the data after operations that might block. + +* Because locks can be temporarily released (suspended), entering a critical + section does not guarantee exclusive access to the protected resource + throughout the section's duration. If code within a critical section calls + another function that blocks (e.g., acquires another lock, performs blocking + I/O), all locks held by the thread via critical sections will be released. + This is similar to how the GIL can be released during blocking calls. + +* Only the lock(s) associated with the most recently entered (top-most) + critical section are guaranteed to be held at any given time. Locks for + outer, nested critical sections might have been suspended. + +* You can lock at most two objects simultaneously with these APIs. If you need + to lock more objects, you'll need to restructure your code. + +* While critical sections will not deadlock if you attempt to lock the same + object twice, they are less efficient than purpose-built reentrant locks for + this use case. + +* When using :c:macro:`Py_BEGIN_CRITICAL_SECTION2`, the order of the objects + doesn't affect correctness (the implementation handles deadlock avoidance), + but it's good practice to always lock objects in a consistent order. + +* Remember that the critical section macros are primarily for protecting access + to *Python objects* that might be involved in internal CPython operations + susceptible to the deadlock scenarios described above. For protecting purely + internal extension state, standard mutexes or other synchronization + primitives might be more appropriate. + + Building Extensions for the Free-Threaded Build =============================================== @@ -253,7 +396,7 @@ The wheels, shared libraries, and binaries are indicated by a ``t`` suffix. free-threaded build, with the ``t`` suffix, such as ``python3.13t``. * `pypa/cibuildwheel `_ supports the free-threaded build if you set - `CIBW_FREE_THREADED_SUPPORT `_. + `CIBW_ENABLE to cpython-freethreading `_. Limited C API and Stable ABI ............................ diff --git a/Doc/howto/free-threading-python.rst b/Doc/howto/free-threading-python.rst index f7a894ac2cd..c33cef2c8e9 100644 --- a/Doc/howto/free-threading-python.rst +++ b/Doc/howto/free-threading-python.rst @@ -32,7 +32,7 @@ optionally support installing free-threaded Python binaries. The installers are available at https://www.python.org/downloads/. For information on other platforms, see the `Installing a Free-Threaded Python -`_, a +`_, a community-maintained installation guide for installing free-threaded Python. When building CPython from source, the :option:`--disable-gil` configure option diff --git a/Doc/howto/functional.rst b/Doc/howto/functional.rst index 1f0608fb0fc..b4f3463afee 100644 --- a/Doc/howto/functional.rst +++ b/Doc/howto/functional.rst @@ -372,8 +372,8 @@ have the form:: for expr2 in sequence2 if condition2 for expr3 in sequence3 - ... if condition3 + ... for exprN in sequenceN if conditionN ) diff --git a/Doc/howto/index.rst b/Doc/howto/index.rst index c09f92c9528..f350141004c 100644 --- a/Doc/howto/index.rst +++ b/Doc/howto/index.rst @@ -34,6 +34,7 @@ Python Library Reference. mro.rst free-threading-python.rst free-threading-extensions.rst + remote_debugging.rst General: @@ -66,3 +67,4 @@ Debugging and profiling: * :ref:`gdb` * :ref:`instrumentation` * :ref:`perf_profiling` +* :ref:`remote-debugging` diff --git a/Doc/howto/isolating-extensions.rst b/Doc/howto/isolating-extensions.rst index a636e06bda8..5513cd73675 100644 --- a/Doc/howto/isolating-extensions.rst +++ b/Doc/howto/isolating-extensions.rst @@ -215,21 +215,36 @@ multiple interpreters correctly. If this is not yet the case for your module, you can explicitly make your module loadable only once per process. For example:: + // A process-wide flag static int loaded = 0; + // Mutex to provide thread safety (only needed for free-threaded Python) + static PyMutex modinit_mutex = {0}; + static int exec_module(PyObject* module) { + PyMutex_Lock(&modinit_mutex); if (loaded) { + PyMutex_Unlock(&modinit_mutex); PyErr_SetString(PyExc_ImportError, "cannot load module more than once per process"); return -1; } loaded = 1; + PyMutex_Unlock(&modinit_mutex); // ... rest of initialization } +If your module's :c:member:`PyModuleDef.m_clear` function is able to prepare +for future re-initialization, it should clear the ``loaded`` flag. +In this case, your module won't support multiple instances existing +*concurrently*, but it will, for example, support being loaded after +Python runtime shutdown (:c:func:`Py_FinalizeEx`) and re-initialization +(:c:func:`Py_Initialize`). + + Module State Access from Functions ---------------------------------- diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index f08f4517998..7d64a02358a 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -626,6 +626,19 @@ which, when run, will produce: of each message with the handler's level, and only passes a message to a handler if it's appropriate to do so. +.. versionchanged:: 3.14 + The :class:`QueueListener` can be started (and stopped) via the + :keyword:`with` statement. For example: + + .. code-block:: python + + with QueueListener(que, handler) as listener: + # The queue listener automatically starts + # when the 'with' block is entered. + pass + # The queue listener automatically stops once + # the 'with' block is exited. + .. _network-logging: Sending and receiving logging events across a network diff --git a/Doc/howto/perf_profiling.rst b/Doc/howto/perf_profiling.rst index 4cec8f62aeb..b579d776576 100644 --- a/Doc/howto/perf_profiling.rst +++ b/Doc/howto/perf_profiling.rst @@ -254,13 +254,28 @@ files in the current directory which are ELF images for all the JIT trampolines that were created by Python. .. warning:: - Notice that when using ``--call-graph dwarf`` the ``perf`` tool will take + When using ``--call-graph dwarf``, the ``perf`` tool will take snapshots of the stack of the process being profiled and save the - information in the ``perf.data`` file. By default the size of the stack dump - is 8192 bytes but the user can change the size by passing the size after - comma like ``--call-graph dwarf,4096``. The size of the stack dump is - important because if the size is too small ``perf`` will not be able to - unwind the stack and the output will be incomplete. On the other hand, if - the size is too big, then ``perf`` won't be able to sample the process as - frequently as it would like as the overhead will be higher. + information in the ``perf.data`` file. By default, the size of the stack dump + is 8192 bytes, but you can change the size by passing it after + a comma like ``--call-graph dwarf,16384``. + The size of the stack dump is important because if the size is too small + ``perf`` will not be able to unwind the stack and the output will be + incomplete. On the other hand, if the size is too big, then ``perf`` won't + be able to sample the process as frequently as it would like as the overhead + will be higher. + + The stack size is particularly important when profiling Python code compiled + with low optimization levels (like ``-O0``), as these builds tend to have + larger stack frames. If you are compiling Python with ``-O0`` and not seeing + Python functions in your profiling output, try increasing the stack dump + size to 65528 bytes (the maximum):: + + $ perf record -F 9999 -g -k 1 --call-graph dwarf,65528 -o perf.data python -Xperf_jit my_script.py + + Different compilation flags can significantly impact stack sizes: + + - Builds with ``-O0`` typically have much larger stack frames than those with ``-O1`` or higher + - Adding optimizations (``-O1``, ``-O2``, etc.) typically reduces stack size + - Frame pointers (``-fno-omit-frame-pointer``) generally provide more reliable stack unwinding diff --git a/Doc/howto/regex.rst b/Doc/howto/regex.rst index 5e2f9a9d183..e543f6d5657 100644 --- a/Doc/howto/regex.rst +++ b/Doc/howto/regex.rst @@ -738,9 +738,12 @@ given location, they can obviously be matched an infinite number of times. different: ``\A`` still matches only at the beginning of the string, but ``^`` may match at any location inside the string that follows a newline character. -``\Z`` +``\z`` Matches only at the end of the string. +``\Z`` + The same as ``\z``. For compatibility with old Python versions. + ``\b`` Word boundary. This is a zero-width assertion that matches only at the beginning or end of a word. A word is defined as a sequence of alphanumeric diff --git a/Doc/howto/remote_debugging.rst b/Doc/howto/remote_debugging.rst new file mode 100644 index 00000000000..3adb6ad03e5 --- /dev/null +++ b/Doc/howto/remote_debugging.rst @@ -0,0 +1,545 @@ +.. _remote-debugging: + +Remote debugging attachment protocol +==================================== + +This section describes the low-level protocol that enables external tools to +inject and execute a Python script within a running CPython process. + +This mechanism forms the basis of the :func:`sys.remote_exec` function, which +instructs a remote Python process to execute a ``.py`` file. However, this +section does not document the usage of that function. Instead, it provides a +detailed explanation of the underlying protocol, which takes as input the +``pid`` of a target Python process and the path to a Python source file to be +executed. This information supports independent reimplementation of the +protocol, regardless of programming language. + +.. warning:: + + The execution of the injected script depends on the interpreter reaching a + safe evaluation point. As a result, execution may be delayed depending on + the runtime state of the target process. + +Once injected, the script is executed by the interpreter within the target +process the next time a safe evaluation point is reached. This approach enables +remote execution capabilities without modifying the behavior or structure of +the running Python application. + +Subsequent sections provide a step-by-step description of the protocol, +including techniques for locating interpreter structures in memory, safely +accessing internal fields, and triggering code execution. Platform-specific +variations are noted where applicable, and example implementations are included +to clarify each operation. + +Locating the PyRuntime structure +================================ + +CPython places the ``PyRuntime`` structure in a dedicated binary section to +help external tools find it at runtime. The name and format of this section +vary by platform. For example, ``.PyRuntime`` is used on ELF systems, and +``__DATA,__PyRuntime`` is used on macOS. Tools can find the offset of this +structure by examining the binary on disk. + +The ``PyRuntime`` structure contains CPython’s global interpreter state and +provides access to other internal data, including the list of interpreters, +thread states, and debugger support fields. + +To work with a remote Python process, a debugger must first find the memory +address of the ``PyRuntime`` structure in the target process. This address +can’t be hardcoded or calculated from a symbol name, because it depends on +where the operating system loaded the binary. + +The method for finding ``PyRuntime`` depends on the platform, but the steps are +the same in general: + +1. Find the base address where the Python binary or shared library was loaded + in the target process. +2. Use the on-disk binary to locate the offset of the ``.PyRuntime`` section. +3. Add the section offset to the base address to compute the address in memory. + +The sections below explain how to do this on each supported platform and +include example code. + +.. rubric:: Linux (ELF) + +To find the ``PyRuntime`` structure on Linux: + +1. Read the process’s memory map (for example, ``/proc//maps``) to find + the address where the Python executable or ``libpython`` was loaded. +2. Parse the ELF section headers in the binary to get the offset of the + ``.PyRuntime`` section. +3. Add that offset to the base address from step 1 to get the memory address of + ``PyRuntime``. + +The following is an example implementation:: + + def find_py_runtime_linux(pid: int) -> int: + # Step 1: Try to find the Python executable in memory + binary_path, base_address = find_mapped_binary( + pid, name_contains="python" + ) + + # Step 2: Fallback to shared library if executable is not found + if binary_path is None: + binary_path, base_address = find_mapped_binary( + pid, name_contains="libpython" + ) + + # Step 3: Parse ELF headers to get .PyRuntime section offset + section_offset = parse_elf_section_offset( + binary_path, ".PyRuntime" + ) + + # Step 4: Compute PyRuntime address in memory + return base_address + section_offset + + +On Linux systems, there are two main approaches to read memory from another +process. The first is through the ``/proc`` filesystem, specifically by reading from +``/proc/[pid]/mem`` which provides direct access to the process's memory. This +requires appropriate permissions - either being the same user as the target +process or having root access. The second approach is using the +``process_vm_readv()`` system call which provides a more efficient way to copy +memory between processes. While ptrace's ``PTRACE_PEEKTEXT`` operation can also be +used to read memory, it is significantly slower as it only reads one word at a +time and requires multiple context switches between the tracer and tracee +processes. + +For parsing ELF sections, the process involves reading and interpreting the ELF +file format structures from the binary file on disk. The ELF header contains a +pointer to the section header table. Each section header contains metadata about +a section including its name (stored in a separate string table), offset, and +size. To find a specific section like .PyRuntime, you need to walk through these +headers and match the section name. The section header then provides the offset +where that section exists in the file, which can be used to calculate its +runtime address when the binary is loaded into memory. + +You can read more about the ELF file format in the `ELF specification +`_. + + +.. rubric:: macOS (Mach-O) + +To find the ``PyRuntime`` structure on macOS: + +1. Call ``task_for_pid()`` to get the ``mach_port_t`` task port for the target + process. This handle is needed to read memory using APIs like + ``mach_vm_read_overwrite`` and ``mach_vm_region``. +2. Scan the memory regions to find the one containing the Python executable or + ``libpython``. +3. Load the binary file from disk and parse the Mach-O headers to find the + section named ``PyRuntime`` in the ``__DATA`` segment. On macOS, symbol + names are automatically prefixed with an underscore, so the ``PyRuntime`` + symbol appears as ``_PyRuntime`` in the symbol table, but the section name + is not affected. + +The following is an example implementation:: + + def find_py_runtime_macos(pid: int) -> int: + # Step 1: Get access to the process's memory + handle = get_memory_access_handle(pid) + + # Step 2: Try to find the Python executable in memory + binary_path, base_address = find_mapped_binary( + handle, name_contains="python" + ) + + # Step 3: Fallback to libpython if the executable is not found + if binary_path is None: + binary_path, base_address = find_mapped_binary( + handle, name_contains="libpython" + ) + + # Step 4: Parse Mach-O headers to get __DATA,__PyRuntime section offset + section_offset = parse_macho_section_offset( + binary_path, "__DATA", "__PyRuntime" + ) + + # Step 5: Compute the PyRuntime address in memory + return base_address + section_offset + +On macOS, accessing another process's memory requires using Mach-O specific APIs +and file formats. The first step is obtaining a ``task_port`` handle via +``task_for_pid()``, which provides access to the target process's memory space. +This handle enables memory operations through APIs like +``mach_vm_read_overwrite()``. + +The process memory can be examined using ``mach_vm_region()`` to scan through the +virtual memory space, while ``proc_regionfilename()`` helps identify which binary +files are loaded at each memory region. When the Python binary or library is +found, its Mach-O headers need to be parsed to locate the ``PyRuntime`` structure. + +The Mach-O format organizes code and data into segments and sections. The +``PyRuntime`` structure lives in a section named ``__PyRuntime`` within the +``__DATA`` segment. The actual runtime address calculation involves finding the +``__TEXT`` segment which serves as the binary's base address, then locating the +``__DATA`` segment containing our target section. The final address is computed by +combining the base address with the appropriate section offsets from the Mach-O +headers. + +Note that accessing another process's memory on macOS typically requires +elevated privileges - either root access or special security entitlements +granted to the debugging process. + + +.. rubric:: Windows (PE) + +To find the ``PyRuntime`` structure on Windows: + +1. Use the ToolHelp API to enumerate all modules loaded in the target process. + This is done using functions such as `CreateToolhelp32Snapshot + `_, + `Module32First + `_, + and `Module32Next + `_. +2. Identify the module corresponding to :file:`python.exe` or + :file:`python{XY}.dll`, where ``X`` and ``Y`` are the major and minor + version numbers of the Python version, and record its base address. +3. Locate the ``PyRuntim`` section. Due to the PE format's 8-character limit + on section names (defined as ``IMAGE_SIZEOF_SHORT_NAME``), the original + name ``PyRuntime`` is truncated. This section contains the ``PyRuntime`` + structure. +4. Retrieve the section’s relative virtual address (RVA) and add it to the base + address of the module. + +The following is an example implementation:: + + def find_py_runtime_windows(pid: int) -> int: + # Step 1: Try to find the Python executable in memory + binary_path, base_address = find_loaded_module( + pid, name_contains="python" + ) + + # Step 2: Fallback to shared pythonXY.dll if the executable is not + # found + if binary_path is None: + binary_path, base_address = find_loaded_module( + pid, name_contains="python3" + ) + + # Step 3: Parse PE section headers to get the RVA of the PyRuntime + # section. The section name appears as "PyRuntim" due to the + # 8-character limit defined by the PE format (IMAGE_SIZEOF_SHORT_NAME). + section_rva = parse_pe_section_offset(binary_path, "PyRuntim") + + # Step 4: Compute PyRuntime address in memory + return base_address + section_rva + + +On Windows, accessing another process's memory requires using the Windows API +functions like ``CreateToolhelp32Snapshot()`` and ``Module32First()/Module32Next()`` +to enumerate loaded modules. The ``OpenProcess()`` function provides a handle to +access the target process's memory space, enabling memory operations through +``ReadProcessMemory()``. + +The process memory can be examined by enumerating loaded modules to find the +Python binary or DLL. When found, its PE headers need to be parsed to locate the +``PyRuntime`` structure. + +The PE format organizes code and data into sections. The ``PyRuntime`` structure +lives in a section named "PyRuntim" (truncated from "PyRuntime" due to PE's +8-character name limit). The actual runtime address calculation involves finding +the module's base address from the module entry, then locating our target +section in the PE headers. The final address is computed by combining the base +address with the section's virtual address from the PE section headers. + +Note that accessing another process's memory on Windows typically requires +appropriate privileges - either administrative access or the ``SeDebugPrivilege`` +privilege granted to the debugging process. + + +Reading _Py_DebugOffsets +======================== + +Once the address of the ``PyRuntime`` structure has been determined, the next +step is to read the ``_Py_DebugOffsets`` structure located at the beginning of +the ``PyRuntime`` block. + +This structure provides version-specific field offsets that are needed to +safely read interpreter and thread state memory. These offsets vary between +CPython versions and must be checked before use to ensure they are compatible. + +To read and check the debug offsets, follow these steps: + +1. Read memory from the target process starting at the ``PyRuntime`` address, + covering the same number of bytes as the ``_Py_DebugOffsets`` structure. + This structure is located at the very start of the ``PyRuntime`` memory + block. Its layout is defined in CPython’s internal headers and stays the + same within a given minor version, but may change in major versions. + +2. Check that the structure contains valid data: + + - The ``cookie`` field must match the expected debug marker. + - The ``version`` field must match the version of the Python interpreter + used by the debugger. + - If either the debugger or the target process is using a pre-release + version (for example, an alpha, beta, or release candidate), the versions + must match exactly. + - The ``free_threaded`` field must have the same value in both the debugger + and the target process. + +3. If the structure is valid, the offsets it contains can be used to locate + fields in memory. If any check fails, the debugger should stop the operation + to avoid reading memory in the wrong format. + +The following is an example implementation that reads and checks +``_Py_DebugOffsets``:: + + def read_debug_offsets(pid: int, py_runtime_addr: int) -> DebugOffsets: + # Step 1: Read memory from the target process at the PyRuntime address + data = read_process_memory( + pid, address=py_runtime_addr, size=DEBUG_OFFSETS_SIZE + ) + + # Step 2: Deserialize the raw bytes into a _Py_DebugOffsets structure + debug_offsets = parse_debug_offsets(data) + + # Step 3: Validate the contents of the structure + if debug_offsets.cookie != EXPECTED_COOKIE: + raise RuntimeError("Invalid or missing debug cookie") + if debug_offsets.version != LOCAL_PYTHON_VERSION: + raise RuntimeError( + "Mismatch between caller and target Python versions" + ) + if debug_offsets.free_threaded != LOCAL_FREE_THREADED: + raise RuntimeError("Mismatch in free-threaded configuration") + + return debug_offsets + + + +.. warning:: + + **Process suspension recommended** + + To avoid race conditions and ensure memory consistency, it is strongly + recommended that the target process be suspended before performing any + operations that read or write internal interpreter state. The Python runtime + may concurrently mutate interpreter data structures—such as creating or + destroying threads—during normal execution. This can result in invalid + memory reads or writes. + + A debugger may suspend execution by attaching to the process with ``ptrace`` + or by sending a ``SIGSTOP`` signal. Execution should only be resumed after + debugger-side memory operations are complete. + + .. note:: + + Some tools, such as profilers or sampling-based debuggers, may operate on + a running process without suspension. In such cases, tools must be + explicitly designed to handle partially updated or inconsistent memory. + For most debugger implementations, suspending the process remains the + safest and most robust approach. + + +Locating the interpreter and thread state +========================================= + +Before code can be injected and executed in a remote Python process, the +debugger must choose a thread in which to schedule execution. This is necessary +because the control fields used to perform remote code injection are located in +the ``_PyRemoteDebuggerSupport`` structure, which is embedded in a +``PyThreadState`` object. These fields are modified by the debugger to request +execution of injected scripts. + +The ``PyThreadState`` structure represents a thread running inside a Python +interpreter. It maintains the thread’s evaluation context and contains the +fields required for debugger coordination. Locating a valid ``PyThreadState`` +is therefore a key prerequisite for triggering execution remotely. + +A thread is typically selected based on its role or ID. In most cases, the main +thread is used, but some tools may target a specific thread by its native +thread ID. Once the target thread is chosen, the debugger must locate both the +interpreter and the associated thread state structures in memory. + +The relevant internal structures are defined as follows: + +- ``PyInterpreterState`` represents an isolated Python interpreter instance. + Each interpreter maintains its own set of imported modules, built-in state, + and thread state list. Although most Python applications use a single + interpreter, CPython supports multiple interpreters in the same process. + +- ``PyThreadState`` represents a thread running within an interpreter. It + contains execution state and the control fields used by the debugger. + +To locate a thread: + +1. Use the offset ``runtime_state.interpreters_head`` to obtain the address of + the first interpreter in the ``PyRuntime`` structure. This is the entry point + to the linked list of active interpreters. + +2. Use the offset ``interpreter_state.threads_main`` to access the main thread + state associated with the selected interpreter. This is typically the most + reliable thread to target. + +3. Optionally, use the offset ``interpreter_state.threads_head`` to iterate +through the linked list of all thread states. Each ``PyThreadState`` structure +contains a ``native_thread_id`` field, which may be compared to a target thread +ID to find a specific thread. + +1. Once a valid ``PyThreadState`` has been found, its address can be used in +later steps of the protocol, such as writing debugger control fields and +scheduling execution. + +The following is an example implementation that locates the main thread state:: + + def find_main_thread_state( + pid: int, py_runtime_addr: int, debug_offsets: DebugOffsets, + ) -> int: + # Step 1: Read interpreters_head from PyRuntime + interp_head_ptr = ( + py_runtime_addr + debug_offsets.runtime_state.interpreters_head + ) + interp_addr = read_pointer(pid, interp_head_ptr) + if interp_addr == 0: + raise RuntimeError("No interpreter found in the target process") + + # Step 2: Read the threads_main pointer from the interpreter + threads_main_ptr = ( + interp_addr + debug_offsets.interpreter_state.threads_main + ) + thread_state_addr = read_pointer(pid, threads_main_ptr) + if thread_state_addr == 0: + raise RuntimeError("Main thread state is not available") + + return thread_state_addr + +The following example demonstrates how to locate a thread by its native thread +ID:: + + def find_thread_by_id( + pid: int, + interp_addr: int, + debug_offsets: DebugOffsets, + target_tid: int, + ) -> int: + # Start at threads_head and walk the linked list + thread_ptr = read_pointer( + pid, + interp_addr + debug_offsets.interpreter_state.threads_head + ) + + while thread_ptr: + native_tid_ptr = ( + thread_ptr + debug_offsets.thread_state.native_thread_id + ) + native_tid = read_int(pid, native_tid_ptr) + if native_tid == target_tid: + return thread_ptr + thread_ptr = read_pointer( + pid, + thread_ptr + debug_offsets.thread_state.next + ) + + raise RuntimeError("Thread with the given ID was not found") + + +Once a valid thread state has been located, the debugger can proceed with +modifying its control fields and scheduling execution, as described in the next +section. + +Writing control information +=========================== + +Once a valid ``PyThreadState`` structure has been identified, the debugger may +modify control fields within it to schedule the execution of a specified Python +script. These control fields are checked periodically by the interpreter, and +when set correctly, they trigger the execution of remote code at a safe point +in the evaluation loop. + +Each ``PyThreadState`` contains a ``_PyRemoteDebuggerSupport`` structure used +for communication between the debugger and the interpreter. The locations of +its fields are defined by the ``_Py_DebugOffsets`` structure and include the +following: + +- ``debugger_script_path``: A fixed-size buffer that holds the full path to a + Python source file (``.py``). This file must be accessible and readable by + the target process when execution is triggered. + +- ``debugger_pending_call``: An integer flag. Setting this to ``1`` tells the + interpreter that a script is ready to be executed. + +- ``eval_breaker``: A field checked by the interpreter during execution. + Setting bit 5 (``_PY_EVAL_PLEASE_STOP_BIT``, value ``1U << 5``) in this + field causes the interpreter to pause and check for debugger activity. + +To complete the injection, the debugger must perform the following steps: + +1. Write the full script path into the ``debugger_script_path`` buffer. +2. Set ``debugger_pending_call`` to ``1``. +3. Read the current value of ``eval_breaker``, set bit 5 + (``_PY_EVAL_PLEASE_STOP_BIT``), and write the updated value back. This + signals the interpreter to check for debugger activity. + +The following is an example implementation:: + + def inject_script( + pid: int, + thread_state_addr: int, + debug_offsets: DebugOffsets, + script_path: str + ) -> None: + # Compute the base offset of _PyRemoteDebuggerSupport + support_base = ( + thread_state_addr + + debug_offsets.debugger_support.remote_debugger_support + ) + + # Step 1: Write the script path into debugger_script_path + script_path_ptr = ( + support_base + + debug_offsets.debugger_support.debugger_script_path + ) + write_string(pid, script_path_ptr, script_path) + + # Step 2: Set debugger_pending_call to 1 + pending_ptr = ( + support_base + + debug_offsets.debugger_support.debugger_pending_call + ) + write_int(pid, pending_ptr, 1) + + # Step 3: Set _PY_EVAL_PLEASE_STOP_BIT (bit 5, value 1 << 5) in + # eval_breaker + eval_breaker_ptr = ( + thread_state_addr + + debug_offsets.debugger_support.eval_breaker + ) + breaker = read_int(pid, eval_breaker_ptr) + breaker |= (1 << 5) + write_int(pid, eval_breaker_ptr, breaker) + + +Once these fields are set, the debugger may resume the process (if it was +suspended). The interpreter will process the request at the next safe +evaluation point, load the script from disk, and execute it. + +It is the responsibility of the debugger to ensure that the script file remains +present and accessible to the target process during execution. + +.. note:: + + Script execution is asynchronous. The script file cannot be deleted + immediately after injection. The debugger should wait until the injected + script has produced an observable effect before removing the file. + This effect depends on what the script is designed to do. For example, + a debugger might wait until the remote process connects back to a socket + before removing the script. Once such an effect is observed, it is safe to + assume the file is no longer needed. + +Summary +======= + +To inject and execute a Python script in a remote process: + +1. Locate the ``PyRuntime`` structure in the target process’s memory. +2. Read and validate the ``_Py_DebugOffsets`` structure at the beginning of + ``PyRuntime``. +3. Use the offsets to locate a valid ``PyThreadState``. +4. Write the path to a Python script into ``debugger_script_path``. +5. Set the ``debugger_pending_call`` flag to ``1``. +6. Set ``_PY_EVAL_PLEASE_STOP_BIT`` in the ``eval_breaker`` field. +7. Resume the process (if suspended). The script will execute at the next safe + evaluation point. + diff --git a/Doc/library/annotationlib.rst b/Doc/library/annotationlib.rst index e07081e3c5d..41c9ce479ff 100644 --- a/Doc/library/annotationlib.rst +++ b/Doc/library/annotationlib.rst @@ -40,7 +40,7 @@ The :func:`get_annotations` function is the main entry point for retrieving annotations. Given a function, class, or module, it returns an annotations dictionary in the requested format. This module also provides functionality for working directly with the :term:`annotate function` -that is used to evaluate annotations, such as :func:`get_annotate_function` +that is used to evaluate annotations, such as :func:`get_annotate_from_class_namespace` and :func:`call_annotate_function`, as well as the :func:`call_evaluate_function` function for working with :term:`evaluate functions `. @@ -127,25 +127,8 @@ Classes Values are the result of evaluating the annotation expressions. - .. attribute:: FORWARDREF - :value: 2 - - Values are real annotation values (as per :attr:`Format.VALUE` format) - for defined values, and :class:`ForwardRef` proxies for undefined - values. Real objects may contain references to, :class:`ForwardRef` - proxy objects. - - .. attribute:: STRING - :value: 3 - - Values are the text string of the annotation as it appears in the - source code, up to modifications including, but not restricted to, - whitespace normalizations and constant values optimizations. - - The exact values of these strings may change in future versions of Python. - .. attribute:: VALUE_WITH_FAKE_GLOBALS - :value: 4 + :value: 2 Special value used to signal that an annotate function is being evaluated in a special environment with fake globals. When passed this @@ -155,6 +138,23 @@ Classes This format is only used internally and should not be passed to the functions in this module. + .. attribute:: FORWARDREF + :value: 3 + + Values are real annotation values (as per :attr:`Format.VALUE` format) + for defined values, and :class:`ForwardRef` proxies for undefined + values. Real objects may contain references to :class:`ForwardRef` + proxy objects. + + .. attribute:: STRING + :value: 4 + + Values are the text string of the annotation as it appears in the + source code, up to modifications including, but not restricted to, + whitespace normalizations and constant values optimizations. + + The exact values of these strings may change in future versions of Python. + .. versionadded:: 3.14 .. class:: ForwardRef @@ -172,14 +172,21 @@ Classes :class:`~ForwardRef`. The string may not be exactly equivalent to the original source. - .. method:: evaluate(*, owner=None, globals=None, locals=None, type_params=None) + .. method:: evaluate(*, owner=None, globals=None, locals=None, type_params=None, format=Format.VALUE) Evaluate the forward reference, returning its value. - This may throw an exception, such as :exc:`NameError`, if the forward + If the *format* argument is :attr:`~Format.VALUE` (the default), + this method may throw an exception, such as :exc:`NameError`, if the forward reference refers to a name that cannot be resolved. The arguments to this method can be used to provide bindings for names that would otherwise - be undefined. + be undefined. If the *format* argument is :attr:`~Format.FORWARDREF`, + the method will never throw an exception, but may return a :class:`~ForwardRef` + instance. For example, if the forward reference object contains the code + ``list[undefined]``, where ``undefined`` is a name that is not defined, + evaluating it with the :attr:`~Format.FORWARDREF` format will return + ``list[ForwardRef('undefined')]``. If the *format* argument is + :attr:`~Format.STRING`, the method will return :attr:`~ForwardRef.__forward_arg__`. The *owner* parameter provides the preferred mechanism for passing scope information to this method. The owner of a :class:`~ForwardRef` is the @@ -214,7 +221,7 @@ Functions Convert an annotations dict containing runtime values to a dict containing only strings. If the values are not already strings, - they are converted using :func:`value_to_string`. + they are converted using :func:`type_repr`. This is meant as a helper for user-provided annotate functions that support the :attr:`~Format.STRING` format but do not have access to the code creating the annotations. @@ -300,15 +307,13 @@ Functions .. versionadded:: 3.14 -.. function:: get_annotate_function(obj) +.. function:: get_annotate_from_class_namespace(namespace) - Retrieve the :term:`annotate function` for *obj*. Return :const:`!None` - if *obj* does not have an annotate function. - - This is usually equivalent to accessing the :attr:`~object.__annotate__` - attribute of *obj*, but direct access to the attribute may return the wrong - object in certain situations involving metaclasses. This function should be - used instead of accessing the attribute directly. + Retrieve the :term:`annotate function` from a class namespace dictionary *namespace*. + Return :const:`!None` if the namespace does not contain an annotate function. + This is primarily useful before the class has been fully created (e.g., in a metaclass); + after the class exists, the annotate function can be retrieved with ``cls.__annotate__``. + See :ref:`below ` for an example using this function in a metaclass. .. versionadded:: 3.14 @@ -317,11 +322,22 @@ Functions Compute the annotations dict for an object. *obj* may be a callable, class, module, or other object with - :attr:`~object.__annotate__` and :attr:`~object.__annotations__` attributes. - Passing in an object of any other type raises :exc:`TypeError`. + :attr:`~object.__annotate__` or :attr:`~object.__annotations__` attributes. + Passing any other object raises :exc:`TypeError`. The *format* parameter controls the format in which annotations are returned, and must be a member of the :class:`Format` enum or its integer equivalent. + The different formats work as follows: + + * VALUE: :attr:`!object.__annotations__` is tried first; if that does not exist, + the :attr:`!object.__annotate__` function is called if it exists. + * FORWARDREF: If :attr:`!object.__annotations__` exists and can be evaluated successfully, + it is used; otherwise, the :attr:`!object.__annotate__` function is called. If it + does not exist either, :attr:`!object.__annotations__` is tried again and any error + from accessing it is re-raised. + * STRING: If :attr:`!object.__annotate__` exists, it is called first; + otherwise, :attr:`!object.__annotations__` is used and stringified + using :func:`annotations_to_string`. Returns a dict. :func:`!get_annotations` returns a new dict every time it's called; calling it twice on the same object will return two @@ -382,7 +398,7 @@ Functions .. versionadded:: 3.14 -.. function:: value_to_string(value) +.. function:: type_repr(value) Convert an arbitrary Python value to a format suitable for use by the :attr:`~Format.STRING` format. This calls :func:`repr` for most @@ -396,3 +412,190 @@ Functions .. versionadded:: 3.14 + +Recipes +------- + +.. _annotationlib-metaclass: + +Using annotations in a metaclass +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A :ref:`metaclass ` may want to inspect or even modify the annotations +in a class body during class creation. Doing so requires retrieving annotations +from the class namespace dictionary. For classes created with +``from __future__ import annotations``, the annotations will be in the ``__annotations__`` +key of the dictionary. For other classes with annotations, +:func:`get_annotate_from_class_namespace` can be used to get the +annotate function, and :func:`call_annotate_function` can be used to call it and +retrieve the annotations. Using the :attr:`~Format.FORWARDREF` format will usually +be best, because this allows the annotations to refer to names that cannot yet be +resolved when the class is created. + +To modify the annotations, it is best to create a wrapper annotate function +that calls the original annotate function, makes any necessary adjustments, and +returns the result. + +Below is an example of a metaclass that filters out all :class:`typing.ClassVar` +annotations from the class and puts them in a separate attribute: + +.. code-block:: python + + import annotationlib + import typing + + class ClassVarSeparator(type): + def __new__(mcls, name, bases, ns): + if "__annotations__" in ns: # from __future__ import annotations + annotations = ns["__annotations__"] + classvar_keys = { + key for key, value in annotations.items() + # Use string comparison for simplicity; a more robust solution + # could use annotationlib.ForwardRef.evaluate + if value.startswith("ClassVar") + } + classvars = {key: annotations[key] for key in classvar_keys} + ns["__annotations__"] = { + key: value for key, value in annotations.items() + if key not in classvar_keys + } + wrapped_annotate = None + elif annotate := annotationlib.get_annotate_from_class_namespace(ns): + annotations = annotationlib.call_annotate_function( + annotate, format=annotationlib.Format.FORWARDREF + ) + classvar_keys = { + key for key, value in annotations.items() + if typing.get_origin(value) is typing.ClassVar + } + classvars = {key: annotations[key] for key in classvar_keys} + + def wrapped_annotate(format): + annos = annotationlib.call_annotate_function(annotate, format, owner=typ) + return {key: value for key, value in annos.items() if key not in classvar_keys} + + else: # no annotations + classvars = {} + wrapped_annotate = None + typ = super().__new__(mcls, name, bases, ns) + + if wrapped_annotate is not None: + # Wrap the original __annotate__ with a wrapper that removes ClassVars + typ.__annotate__ = wrapped_annotate + typ.classvars = classvars # Store the ClassVars in a separate attribute + return typ + + +Limitations of the ``STRING`` format +------------------------------------ + +The :attr:`~Format.STRING` format is meant to approximate the source code +of the annotation, but the implementation strategy used means that it is not +always possible to recover the exact source code. + +First, the stringifier of course cannot recover any information that is not present in +the compiled code, including comments, whitespace, parenthesization, and operations that +get simplified by the compiler. + +Second, the stringifier can intercept almost all operations that involve names looked +up in some scope, but it cannot intercept operations that operate fully on constants. +As a corollary, this also means it is not safe to request the ``STRING`` format on +untrusted code: Python is powerful enough that it is possible to achieve arbitrary +code execution even with no access to any globals or builtins. For example: + +.. code-block:: pycon + + >>> def f(x: (1).__class__.__base__.__subclasses__()[-1].__init__.__builtins__["print"]("Hello world")): pass + ... + >>> annotationlib.get_annotations(f, format=annotationlib.Format.SOURCE) + Hello world + {'x': 'None'} + +.. note:: + This particular example works as of the time of writing, but it relies on + implementation details and is not guaranteed to work in the future. + +Among the different kinds of expressions that exist in Python, +as represented by the :mod:`ast` module, some expressions are supported, +meaning that the ``STRING`` format can generally recover the original source code; +others are unsupported, meaning that they may result in incorrect output or an error. + +The following are supported (sometimes with caveats): + +* :class:`ast.BinOp` +* :class:`ast.UnaryOp` + + * :class:`ast.Invert` (``~``), :class:`ast.UAdd` (``+``), and :class:`ast.USub` (``-``) are supported + * :class:`ast.Not` (``not``) is not supported + +* :class:`ast.Dict` (except when using ``**`` unpacking) +* :class:`ast.Set` +* :class:`ast.Compare` + + * :class:`ast.Eq` and :class:`ast.NotEq` are supported + * :class:`ast.Lt`, :class:`ast.LtE`, :class:`ast.Gt`, and :class:`ast.GtE` are supported, but the operand may be flipped + * :class:`ast.Is`, :class:`ast.IsNot`, :class:`ast.In`, and :class:`ast.NotIn` are not supported + +* :class:`ast.Call` (except when using ``**`` unpacking) +* :class:`ast.Constant` (though not the exact representation of the constant; for example, escape + sequences in strings are lost; hexadecimal numbers are converted to decimal) +* :class:`ast.Attribute` (assuming the value is not a constant) +* :class:`ast.Subscript` (assuming the value is not a constant) +* :class:`ast.Starred` (``*`` unpacking) +* :class:`ast.Name` +* :class:`ast.List` +* :class:`ast.Tuple` +* :class:`ast.Slice` + +The following are unsupported, but throw an informative error when encountered by the +stringifier: + +* :class:`ast.FormattedValue` (f-strings; error is not detected if conversion specifiers like ``!r`` + are used) +* :class:`ast.JoinedStr` (f-strings) + +The following are unsupported and result in incorrect output: + +* :class:`ast.BoolOp` (``and`` and ``or``) +* :class:`ast.IfExp` +* :class:`ast.Lambda` +* :class:`ast.ListComp` +* :class:`ast.SetComp` +* :class:`ast.DictComp` +* :class:`ast.GeneratorExp` + +The following are disallowed in annotation scopes and therefore not relevant: + +* :class:`ast.NamedExpr` (``:=``) +* :class:`ast.Await` +* :class:`ast.Yield` +* :class:`ast.YieldFrom` + + +Limitations of the ``FORWARDREF`` format +---------------------------------------- + +The :attr:`~Format.FORWARDREF` format aims to produce real values as much +as possible, with anything that cannot be resolved replaced with +:class:`ForwardRef` objects. It is affected by broadly the same Limitations +as the :attr:`~Format.STRING` format: annotations that perform operations on +literals or that use unsupported expression types may raise exceptions when +evaluated using the :attr:`~Format.FORWARDREF` format. + +Below are a few examples of the behavior with unsupported expressions: + +.. code-block:: pycon + + >>> from annotationlib import get_annotations, Format + >>> def zerodiv(x: 1 / 0): ... + >>> get_annotations(zerodiv, format=Format.STRING) + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + >>> get_annotations(zerodiv, format=Format.FORWARDREF) + Traceback (most recent call last): + ... + ZeroDivisionError: division by zero + >>> def ifexp(x: 1 if y else 0): ... + >>> get_annotations(ifexp, format=Format.STRING) + {'x': '1'} diff --git a/Doc/library/archiving.rst b/Doc/library/archiving.rst index c9284949af4..da0b3f8c3e7 100644 --- a/Doc/library/archiving.rst +++ b/Doc/library/archiving.rst @@ -5,13 +5,15 @@ Data Compression and Archiving ****************************** The modules described in this chapter support data compression with the zlib, -gzip, bzip2 and lzma algorithms, and the creation of ZIP- and tar-format +gzip, bzip2, lzma, and zstd algorithms, and the creation of ZIP- and tar-format archives. See also :ref:`archiving-operations` provided by the :mod:`shutil` module. .. toctree:: + compression.rst + compression.zstd.rst zlib.rst gzip.rst bz2.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 8d0116d8c06..29396c7a036 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -74,7 +74,7 @@ ArgumentParser objects prefix_chars='-', fromfile_prefix_chars=None, \ argument_default=None, conflict_handler='error', \ add_help=True, allow_abbrev=True, exit_on_error=True, \ - suggest_on_error=False) + *, suggest_on_error=False, color=False) Create a new :class:`ArgumentParser` object. All parameters should be passed as keyword arguments. Each parameter has its own more detailed description @@ -111,7 +111,7 @@ ArgumentParser objects * add_help_ - Add a ``-h/--help`` option to the parser (default: ``True``) * allow_abbrev_ - Allows long options to be abbreviated if the - abbreviation is unambiguous. (default: ``True``) + abbreviation is unambiguous (default: ``True``) * exit_on_error_ - Determines whether or not :class:`!ArgumentParser` exits with error info when an error occurs. (default: ``True``) @@ -119,6 +119,7 @@ ArgumentParser objects * suggest_on_error_ - Enables suggestions for mistyped argument choices and subparser names (default: ``False``) + * color_ - Allow color output (default: ``False``) .. versionchanged:: 3.5 *allow_abbrev* parameter was added. @@ -130,6 +131,9 @@ ArgumentParser objects .. versionchanged:: 3.9 *exit_on_error* parameter was added. + .. versionchanged:: 3.14 + *suggest_on_error* and *color* parameters were added. + The following sections describe how each of these are used. @@ -594,7 +598,8 @@ subparser names, the feature can be enabled by setting ``suggest_on_error`` to ``True``. Note that this only applies for arguments when the choices specified are strings:: - >>> parser = argparse.ArgumentParser(description='Process some integers.', suggest_on_error=True) + >>> parser = argparse.ArgumentParser(description='Process some integers.', + suggest_on_error=True) >>> parser.add_argument('--action', choices=['sum', 'max']) >>> parser.add_argument('integers', metavar='N', type=int, nargs='+', ... help='an integer for the accumulator') @@ -612,6 +617,33 @@ keyword argument:: .. versionadded:: 3.14 +color +^^^^^ + +By default, the help message is printed in plain text. If you want to allow +color in help messages, you can enable it by setting ``color`` to ``True``:: + + >>> parser = argparse.ArgumentParser(description='Process some integers.', + ... color=True) + >>> parser.add_argument('--action', choices=['sum', 'max']) + >>> parser.add_argument('integers', metavar='N', type=int, nargs='+', + ... help='an integer for the accumulator') + >>> parser.parse_args(['--help']) + +Even if a CLI author has enabled color, it can be +:ref:`controlled using environment variables `. + +If you're writing code that needs to be compatible with older Python versions +and want to opportunistically use ``color`` when it's available, you +can set it as an attribute after initializing the parser instead of using the +keyword argument:: + + >>> parser = argparse.ArgumentParser(description='Process some integers.') + >>> parser.color = True + +.. versionadded:: 3.14 + + The add_argument() method ------------------------- diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst index fd901e23285..ca9a6b0712c 100644 --- a/Doc/library/ast.rst +++ b/Doc/library/ast.rst @@ -1,4 +1,4 @@ -:mod:`!ast` --- Abstract Syntax Trees +:mod:`!ast` --- Abstract syntax trees ===================================== .. module:: ast @@ -29,7 +29,7 @@ compiled into a Python code object using the built-in :func:`compile` function. .. _abstract-grammar: -Abstract Grammar +Abstract grammar ---------------- The abstract grammar is currently defined as follows: @@ -1190,7 +1190,7 @@ Control flow .. doctest:: - >> print(ast.dump(ast.parse(""" + >>> print(ast.dump(ast.parse(""" ... while x: ... ... ... else: @@ -1761,6 +1761,43 @@ Pattern matching .. versionadded:: 3.10 + +Type annotations +^^^^^^^^^^^^^^^^ + +.. class:: TypeIgnore(lineno, tag) + + A ``# type: ignore`` comment located at *lineno*. + *tag* is the optional tag specified by the form ``# type: ignore ``. + + .. doctest:: + + >>> print(ast.dump(ast.parse('x = 1 # type: ignore', type_comments=True), indent=4)) + Module( + body=[ + Assign( + targets=[ + Name(id='x', ctx=Store())], + value=Constant(value=1))], + type_ignores=[ + TypeIgnore(lineno=1, tag='')]) + >>> print(ast.dump(ast.parse('x: bool = 1 # type: ignore[assignment]', type_comments=True), indent=4)) + Module( + body=[ + AnnAssign( + target=Name(id='x', ctx=Store()), + annotation=Name(id='bool', ctx=Load()), + value=Constant(value=1), + simple=1)], + type_ignores=[ + TypeIgnore(lineno=1, tag='[assignment]')]) + + .. note:: + :class:`!TypeIgnore` nodes are not generated when the *type_comments* parameter + is set to ``False`` (default). See :func:`ast.parse` for more details. + + .. versionadded:: 3.8 + .. _ast-type-params: Type parameters @@ -2119,10 +2156,10 @@ Async and await of :class:`ast.operator`, :class:`ast.unaryop`, :class:`ast.cmpop`, :class:`ast.boolop` and :class:`ast.expr_context`) on the returned tree will be singletons. Changes to one will be reflected in all other - occurrences of the same value (e.g. :class:`ast.Add`). + occurrences of the same value (for example, :class:`ast.Add`). -:mod:`ast` Helpers +:mod:`ast` helpers ------------------ Apart from the node classes, the :mod:`ast` module defines these utility functions @@ -2447,7 +2484,7 @@ and classes for traversing abstract syntax trees: .. _ast-compiler-flags: -Compiler Flags +Compiler flags -------------- The following flags may be passed to :func:`compile` in order to change @@ -2496,7 +2533,7 @@ effects on the compilation of a program: .. _ast-cli: -Command-Line Usage +Command-line usage ------------------ .. versionadded:: 3.9 @@ -2535,6 +2572,28 @@ The following options are accepted: Indentation of nodes in AST (number of spaces). +.. option:: --feature-version + + Python version in the format 3.x (for example, 3.10). Defaults to the + current version of the interpreter. + + .. versionadded:: 3.14 + +.. option:: -O + --optimize + + Optimization level for parser. Defaults to no optimization. + + .. versionadded:: 3.14 + +.. option:: --show-empty + + Show empty lists and fields that are ``None``. Defaults to not showing empty + objects. + + .. versionadded:: 3.14 + + If :file:`infile` is specified its contents are parsed to AST and dumped to stdout. Otherwise, the content is read from stdin. diff --git a/Doc/library/asyncio-dev.rst b/Doc/library/asyncio-dev.rst index 44b507a9811..7831b613bd4 100644 --- a/Doc/library/asyncio-dev.rst +++ b/Doc/library/asyncio-dev.rst @@ -46,10 +46,6 @@ In addition to enabling the debug mode, consider also: When the debug mode is enabled: -* asyncio checks for :ref:`coroutines that were not awaited - ` and logs them; this mitigates - the "forgotten await" pitfall. - * Many non-threadsafe asyncio APIs (such as :meth:`loop.call_soon` and :meth:`loop.call_at` methods) raise an exception if they are called from a wrong thread. diff --git a/Doc/library/asyncio-eventloop.rst b/Doc/library/asyncio-eventloop.rst index fdb75fe9e63..91970c28239 100644 --- a/Doc/library/asyncio-eventloop.rst +++ b/Doc/library/asyncio-eventloop.rst @@ -65,18 +65,14 @@ an event loop: .. note:: The :mod:`!asyncio` policy system is deprecated and will be removed - in Python 3.16; from there on, this function will always return the - running event loop. - + in Python 3.16; from there on, this function will return the current + running event loop if present else it will return the + loop set by :func:`set_event_loop`. .. function:: set_event_loop(loop) Set *loop* as the current event loop for the current OS thread. - .. deprecated:: 3.14 - The :func:`set_event_loop` function is deprecated and will be removed - in Python 3.16. - .. function:: new_event_loop() Create and return a new event loop object. @@ -365,7 +361,7 @@ Creating Futures and Tasks .. versionadded:: 3.5.2 -.. method:: loop.create_task(coro, *, name=None, context=None) +.. method:: loop.create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Schedule the execution of :ref:`coroutine ` *coro*. Return a :class:`Task` object. @@ -374,6 +370,10 @@ Creating Futures and Tasks for interoperability. In this case, the result type is a subclass of :class:`Task`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. + If the *name* argument is provided and not ``None``, it is set as the name of the task using :meth:`Task.set_name`. @@ -381,12 +381,27 @@ Creating Futures and Tasks custom :class:`contextvars.Context` for the *coro* to run in. The current context copy is created when no *context* is provided. + An optional keyword-only *eager_start* argument allows specifying + if the task should execute eagerly during the call to create_task, + or be scheduled later. If *eager_start* is not passed the mode set + by :meth:`loop.set_task_factory` will be used. + .. versionchanged:: 3.8 Added the *name* parameter. .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.13.3 + Added ``kwargs`` which passes on arbitrary extra parameters, including ``name`` and ``context``. + + .. versionchanged:: 3.13.4 + Rolled back the change that passes on *name* and *context* (if it is None), + while still passing on other arbitrary keyword arguments (to avoid breaking backwards compatibility with 3.13.3). + + .. versionchanged:: 3.14 + All *kwargs* are now passed on. The *eager_start* parameter works with eager task factories. + .. method:: loop.set_task_factory(factory) Set a task factory that will be used by @@ -398,6 +413,16 @@ Creating Futures and Tasks event loop, and *coro* is a coroutine object. The callable must pass on all *kwargs*, and return a :class:`asyncio.Task`-compatible object. + .. versionchanged:: 3.13.3 + Required that all *kwargs* are passed on to :class:`asyncio.Task`. + + .. versionchanged:: 3.13.4 + *name* is no longer passed to task factories. *context* is no longer passed + to task factories if it is ``None``. + + .. versionchanged:: 3.14 + *name* and *context* are now unconditionally passed on to task factories again. + .. method:: loop.get_task_factory() Return a task factory or ``None`` if the default one is in use. @@ -1444,6 +1469,8 @@ Allows customizing how exceptions are handled in the event loop. * 'protocol' (optional): :ref:`Protocol ` instance; * 'transport' (optional): :ref:`Transport ` instance; * 'socket' (optional): :class:`socket.socket` instance; + * 'source_traceback' (optional): Traceback of the source; + * 'handle_traceback' (optional): Traceback of the handle; * 'asyncgen' (optional): Asynchronous generator that caused the exception. diff --git a/Doc/library/asyncio-future.rst b/Doc/library/asyncio-future.rst index 9dce0731411..32771ba72e0 100644 --- a/Doc/library/asyncio-future.rst +++ b/Doc/library/asyncio-future.rst @@ -51,12 +51,13 @@ Future Functions .. important:: - See also the :func:`create_task` function which is the - preferred way for creating new Tasks. - Save a reference to the result of this function, to avoid a task disappearing mid-execution. + See also the :func:`create_task` function which is the + preferred way for creating new tasks or use :class:`asyncio.TaskGroup` + which keeps reference to the task internally. + .. versionchanged:: 3.5.1 The function accepts any :term:`awaitable` object. diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index b6ae4438608..b19ffa8213a 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -238,18 +238,24 @@ Creating Tasks ----------------------------------------------- -.. function:: create_task(coro, *, name=None, context=None) +.. function:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Wrap the *coro* :ref:`coroutine ` into a :class:`Task` and schedule its execution. Return the Task object. - If *name* is not ``None``, it is set as the name of the task using - :meth:`Task.set_name`. + The full function signature is largely the same as that of the + :class:`Task` constructor (or factory) - all of the keyword arguments to + this function are passed through to that interface. An optional keyword-only *context* argument allows specifying a custom :class:`contextvars.Context` for the *coro* to run in. The current context copy is created when no *context* is provided. + An optional keyword-only *eager_start* argument allows specifying + if the task should execute eagerly during the call to create_task, + or be scheduled later. If *eager_start* is not passed the mode set + by :meth:`loop.set_task_factory` will be used. + The task is executed in the loop returned by :func:`get_running_loop`, :exc:`RuntimeError` is raised if there is no running loop in current thread. @@ -290,6 +296,9 @@ Creating Tasks .. versionchanged:: 3.11 Added the *context* parameter. + .. versionchanged:: 3.14 + Added the *eager_start* parameter by passing on all *kwargs*. + Task Cancellation ================= @@ -330,7 +339,7 @@ and reliable way to wait for all tasks in the group to finish. .. versionadded:: 3.11 - .. method:: create_task(coro, *, name=None, context=None) + .. method:: create_task(coro, *, name=None, context=None, eager_start=None, **kwargs) Create a task in this task group. The signature matches that of :func:`asyncio.create_task`. @@ -342,6 +351,10 @@ and reliable way to wait for all tasks in the group to finish. Close the given coroutine if the task group is not active. + .. versionchanged:: 3.14 + + Passes on all *kwargs* to :meth:`loop.create_task` + Example:: async def main(): @@ -1381,7 +1394,10 @@ Task Object Request the Task to be cancelled. - This arranges for a :exc:`CancelledError` exception to be thrown + If the Task is already *done* or *cancelled*, return ``False``, + otherwise, return ``True``. + + The method arranges for a :exc:`CancelledError` exception to be thrown into the wrapped coroutine on the next cycle of the event loop. The coroutine then has a chance to clean up or even deny the diff --git a/Doc/library/audit_events.rst b/Doc/library/audit_events.rst index a2a90a00d0c..73a58092024 100644 --- a/Doc/library/audit_events.rst +++ b/Doc/library/audit_events.rst @@ -23,25 +23,30 @@ information on handling these events. The following events are raised internally and do not correspond to any public API of CPython: -+--------------------------+-------------------------------------------+ -| Audit event | Arguments | -+==========================+===========================================+ -| _winapi.CreateFile | ``file_name``, ``desired_access``, | -| | ``share_mode``, ``creation_disposition``, | -| | ``flags_and_attributes`` | -+--------------------------+-------------------------------------------+ -| _winapi.CreateJunction | ``src_path``, ``dst_path`` | -+--------------------------+-------------------------------------------+ -| _winapi.CreateNamedPipe | ``name``, ``open_mode``, ``pipe_mode`` | -+--------------------------+-------------------------------------------+ -| _winapi.CreatePipe | | -+--------------------------+-------------------------------------------+ -| _winapi.CreateProcess | ``application_name``, ``command_line``, | -| | ``current_directory`` | -+--------------------------+-------------------------------------------+ -| _winapi.OpenProcess | ``process_id``, ``desired_access`` | -+--------------------------+-------------------------------------------+ -| _winapi.TerminateProcess | ``handle``, ``exit_code`` | -+--------------------------+-------------------------------------------+ -| ctypes.PyObj_FromPtr | ``obj`` | -+--------------------------+-------------------------------------------+ ++----------------------------+-------------------------------------------+ +| Audit event | Arguments | ++============================+===========================================+ +| _winapi.CreateFile | ``file_name``, ``desired_access``, | +| | ``share_mode``, ``creation_disposition``, | +| | ``flags_and_attributes`` | ++----------------------------+-------------------------------------------+ +| _winapi.CreateJunction | ``src_path``, ``dst_path`` | ++----------------------------+-------------------------------------------+ +| _winapi.CreateNamedPipe | ``name``, ``open_mode``, ``pipe_mode`` | ++----------------------------+-------------------------------------------+ +| _winapi.CreatePipe | | ++----------------------------+-------------------------------------------+ +| _winapi.CreateProcess | ``application_name``, ``command_line``, | +| | ``current_directory`` | ++----------------------------+-------------------------------------------+ +| _winapi.OpenProcess | ``process_id``, ``desired_access`` | ++----------------------------+-------------------------------------------+ +| _winapi.TerminateProcess | ``handle``, ``exit_code`` | ++----------------------------+-------------------------------------------+ +| _posixsubprocess.fork_exec | ``exec_list``, ``args``, ``env`` | ++----------------------------+-------------------------------------------+ +| ctypes.PyObj_FromPtr | ``obj`` | ++----------------------------+-------------------------------------------+ + +.. versionadded:: 3.14 + The ``_posixsubprocess.fork_exec`` internal audit event. diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 834ab2536e6..529a7242443 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -15,14 +15,9 @@ This module provides functions for encoding binary data to printable ASCII characters and decoding such encodings back to binary data. -It provides encoding and decoding functions for the encodings specified in -:rfc:`4648`, which defines the Base16, Base32, and Base64 algorithms, -and for the de-facto standard Ascii85 and Base85 encodings. - -The :rfc:`4648` encodings are suitable for encoding binary data so that it can be -safely sent by email, used as parts of URLs, or included as part of an HTTP -POST request. The encoding algorithm is not the same as the -:program:`uuencode` program. +This includes the :ref:`encodings specified in ` +:rfc:`4648` (Base64, Base32 and Base16) +and the non-standard :ref:`Base85 encodings `. There are two interfaces provided by this module. The modern interface supports encoding :term:`bytes-like objects ` to ASCII @@ -30,7 +25,7 @@ supports encoding :term:`bytes-like objects ` to ASCII strings containing ASCII to :class:`bytes`. Both base-64 alphabets defined in :rfc:`4648` (normal, and URL- and filesystem-safe) are supported. -The legacy interface does not support decoding from strings, but it does +The :ref:`legacy interface ` does not support decoding from strings, but it does provide functions for encoding and decoding to and from :term:`file objects `. It only supports the Base64 standard alphabet, and it adds newlines every 76 characters as per :rfc:`2045`. Note that if you are looking @@ -46,7 +41,15 @@ package instead. Any :term:`bytes-like objects ` are now accepted by all encoding and decoding functions in this module. Ascii85/Base85 support added. -The modern interface provides: + +.. _base64-rfc-4648: + +RFC 4648 Encodings +------------------ + +The :rfc:`4648` encodings are suitable for encoding binary data so that it can be +safely sent by email, used as parts of URLs, or included as part of an HTTP +POST request. .. function:: b64encode(s, altchars=None) @@ -181,6 +184,26 @@ The modern interface provides: incorrectly padded or if there are non-alphabet characters present in the input. +.. _base64-base-85: + +Base85 Encodings +----------------- + +Base85 encoding is not formally specified but rather a de facto standard, +thus different systems perform the encoding differently. + +The :func:`a85encode` and :func:`b85encode` functions in this module are two implementations of +the de facto standard. You should call the function with the Base85 +implementation used by the software you intend to work with. + +The two functions present in this module differ in how they handle the following: + +* Whether to include enclosing ``<~`` and ``~>`` markers +* Whether to include newline characters +* The set of ASCII characters used for encoding +* Handling of null bytes + +Refer to the documentation of the individual functions for more information. .. function:: a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -262,7 +285,10 @@ The modern interface provides: .. versionadded:: 3.13 -The legacy interface: +.. _base64-legacy: + +Legacy Interface +---------------- .. function:: decode(input, output) diff --git a/Doc/library/cmdline.rst b/Doc/library/cmdline.rst index 85e82f6292a..16c67ddbf7c 100644 --- a/Doc/library/cmdline.rst +++ b/Doc/library/cmdline.rst @@ -1,3 +1,5 @@ +.. _library-cmdline: + ++++++++++++++++++++++++++++++++++++ Modules command-line interface (CLI) ++++++++++++++++++++++++++++++++++++ @@ -27,7 +29,7 @@ The following modules have a command-line interface. * :mod:`pdb` * :ref:`pickle ` * :ref:`pickletools ` -* :mod:`platform` +* :ref:`platform ` * :mod:`poplib` * :ref:`profile ` * :mod:`pstats` diff --git a/Doc/library/code.rst b/Doc/library/code.rst index 8f7692df9fb..52587c4dd8f 100644 --- a/Doc/library/code.rst +++ b/Doc/library/code.rst @@ -22,6 +22,12 @@ build applications which provide an interactive interpreter prompt. it defaults to a newly created dictionary with key ``'__name__'`` set to ``'__console__'`` and key ``'__doc__'`` set to ``None``. + Note that functions and classes objects created under an + :class:`!InteractiveInterpreter` instance will belong to the namespace + specified by *locals*. + They are only pickleable if *locals* is the namespace of an existing + module. + .. class:: InteractiveConsole(locals=None, filename="", local_exit=False) diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index a129a26190b..86511602fa5 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -55,7 +55,7 @@ any codec: The full details for each codec can also be looked up directly: -.. function:: lookup(encoding) +.. function:: lookup(encoding, /) Looks up the codec info in the Python codec registry and returns a :class:`CodecInfo` object as defined below. @@ -156,7 +156,7 @@ these additional functions which use :func:`lookup` for the codec lookup: Custom codecs are made available by registering a suitable codec search function: -.. function:: register(search_function) +.. function:: register(search_function, /) Register a codec search function. Search functions are expected to take one argument, being the encoding name in all lower case letters with hyphens @@ -168,7 +168,7 @@ function: Hyphens and spaces are converted to underscore. -.. function:: unregister(search_function) +.. function:: unregister(search_function, /) Unregister a codec search function and clear the registry's cache. If the search function is not registered, do nothing. @@ -208,6 +208,10 @@ wider range of codecs when working with binary files: .. versionchanged:: 3.11 The ``'U'`` mode has been removed. + .. deprecated:: 3.14 + + :func:`codecs.open` has been superseded by :func:`open`. + .. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict') @@ -416,7 +420,7 @@ In addition, the following error handler is specific to the given codecs: The set of allowed values can be extended by registering a new named error handler: -.. function:: register_error(name, error_handler) +.. function:: register_error(name, error_handler, /) Register the error handling function *error_handler* under the name *name*. The *error_handler* argument will be called during encoding and decoding @@ -442,7 +446,7 @@ handler: Previously registered error handlers (including the standard error handlers) can be looked up by name: -.. function:: lookup_error(name) +.. function:: lookup_error(name, /) Return the error handler previously registered under the name *name*. @@ -1107,7 +1111,7 @@ particular, the following variants typically exist: +-----------------+--------------------------------+--------------------------------+ | cp852 | 852, IBM852 | Central and Eastern Europe | +-----------------+--------------------------------+--------------------------------+ -| cp855 | 855, IBM855 | Bulgarian, Byelorussian, | +| cp855 | 855, IBM855 | Belarusian, Bulgarian, | | | | Macedonian, Russian, Serbian | +-----------------+--------------------------------+--------------------------------+ | cp856 | | Hebrew | @@ -1155,7 +1159,7 @@ particular, the following variants typically exist: +-----------------+--------------------------------+--------------------------------+ | cp1250 | windows-1250 | Central and Eastern Europe | +-----------------+--------------------------------+--------------------------------+ -| cp1251 | windows-1251 | Bulgarian, Byelorussian, | +| cp1251 | windows-1251 | Belarusian, Bulgarian, | | | | Macedonian, Russian, Serbian | +-----------------+--------------------------------+--------------------------------+ | cp1252 | windows-1252 | Western Europe | @@ -1220,7 +1224,7 @@ particular, the following variants typically exist: +-----------------+--------------------------------+--------------------------------+ | iso8859_4 | iso-8859-4, latin4, L4 | Baltic languages | +-----------------+--------------------------------+--------------------------------+ -| iso8859_5 | iso-8859-5, cyrillic | Bulgarian, Byelorussian, | +| iso8859_5 | iso-8859-5, cyrillic | Belarusian, Bulgarian, | | | | Macedonian, Russian, Serbian | +-----------------+--------------------------------+--------------------------------+ | iso8859_6 | iso-8859-6, arabic | Arabic | @@ -1257,7 +1261,7 @@ particular, the following variants typically exist: | | | | | | | .. versionadded:: 3.5 | +-----------------+--------------------------------+--------------------------------+ -| mac_cyrillic | maccyrillic | Bulgarian, Byelorussian, | +| mac_cyrillic | maccyrillic | Belarusian, Bulgarian, | | | | Macedonian, Russian, Serbian | +-----------------+--------------------------------+--------------------------------+ | mac_greek | macgreek | Greek | diff --git a/Doc/library/compression.rst b/Doc/library/compression.rst new file mode 100644 index 00000000000..618b4a3c2bd --- /dev/null +++ b/Doc/library/compression.rst @@ -0,0 +1,18 @@ +The :mod:`!compression` package +=============================== + +.. versionadded:: 3.14 + +The :mod:`!compression` package contains the canonical compression modules +containing interfaces to several different compression algorithms. Some of +these modules have historically been available as separate modules; those will +continue to be available under their original names for compatibility reasons, +and will not be removed without a deprecation cycle. The use of modules in +:mod:`!compression` is encouraged where practical. + +* :mod:`!compression.bz2` -- Re-exports :mod:`bz2` +* :mod:`!compression.gzip` -- Re-exports :mod:`gzip` +* :mod:`!compression.lzma` -- Re-exports :mod:`lzma` +* :mod:`!compression.zlib` -- Re-exports :mod:`zlib` +* :mod:`compression.zstd` -- Wrapper for the Zstandard compression library + diff --git a/Doc/library/compression.zstd.rst b/Doc/library/compression.zstd.rst new file mode 100644 index 00000000000..1e1802155a1 --- /dev/null +++ b/Doc/library/compression.zstd.rst @@ -0,0 +1,840 @@ +:mod:`!compression.zstd` --- Compression compatible with the Zstandard format +============================================================================= + +.. module:: compression.zstd + :synopsis: Low-level interface to compression and decompression routines in + the zstd library. + +.. versionadded:: 3.14 + +**Source code:** :source:`Lib/compression/zstd/__init__.py` + +-------------- + +This module provides classes and functions for compressing and decompressing +data using the Zstandard (or *zstd*) compression algorithm. The +`zstd manual `__ +describes Zstandard as "a fast lossless compression algorithm, targeting +real-time compression scenarios at zlib-level and better compression ratios." +Also included is a file interface that supports reading and writing the +contents of ``.zst`` files created by the :program:`zstd` utility, as well as +raw zstd compressed streams. + +The :mod:`!compression.zstd` module contains: + +* The :func:`.open` function and :class:`ZstdFile` class for reading and + writing compressed files. +* The :class:`ZstdCompressor` and :class:`ZstdDecompressor` classes for + incremental (de)compression. +* The :func:`compress` and :func:`decompress` functions for one-shot + (de)compression. +* The :func:`train_dict` and :func:`finalize_dict` functions and the + :class:`ZstdDict` class to train and manage Zstandard dictionaries. +* The :class:`CompressionParameter`, :class:`DecompressionParameter`, and + :class:`Strategy` classes for setting advanced (de)compression parameters. + + +Exceptions +---------- + +.. exception:: ZstdError + + This exception is raised when an error occurs during compression or + decompression, or while initializing the (de)compressor state. + + +Reading and writing compressed files +------------------------------------ + +.. function:: open(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None, encoding=None, errors=None, newline=None) + + Open a Zstandard-compressed file in binary or text mode, returning a + :term:`file object`. + + The *file* argument can be either a file name (given as a + :class:`str`, :class:`bytes` or :term:`path-like ` + object), in which case the named file is opened, or it can be an existing + file object to read from or write to. + + The mode argument can be either ``'rb'`` for reading (default), ``'wb'`` for + overwriting, ``'ab'`` for appending, or ``'xb'`` for exclusive creation. + These can equivalently be given as ``'r'``, ``'w'``, ``'a'``, and ``'x'`` + respectively. You may also open in text mode with ``'rt'``, ``'wt'``, + ``'at'``, and ``'xt'`` respectively. + + When reading, the *options* argument can be a dictionary providing advanced + decompression parameters; see :class:`DecompressionParameter` for detailed + information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be non-None. + The *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + In binary mode, this function is equivalent to the :class:`ZstdFile` + constructor: ``ZstdFile(file, mode, ...)``. In this case, the + *encoding*, *errors*, and *newline* parameters must not be provided. + + In text mode, a :class:`ZstdFile` object is created, and wrapped in an + :class:`io.TextIOWrapper` instance with the specified encoding, error + handling behavior, and line endings. + + +.. class:: ZstdFile(file, /, mode='rb', *, level=None, options=None, \ + zstd_dict=None) + + Open a Zstandard-compressed file in binary mode. + + A :class:`ZstdFile` can wrap an already-open :term:`file object`, or operate + directly on a named file. The *file* argument specifies either the file + object to wrap, or the name of the file to open (as a :class:`str`, + :class:`bytes` or :term:`path-like ` object). If + wrapping an existing file object, the wrapped file will not be closed when + the :class:`ZstdFile` is closed. + + The *mode* argument can be either ``'rb'`` for reading (default), ``'wb'`` + for overwriting, ``'xb'`` for exclusive creation, or ``'ab'`` for appending. + These can equivalently be given as ``'r'``, ``'w'``, ``'x'`` and ``'a'`` + respectively. + + If *file* is a file object (rather than an actual file name), a mode of + ``'w'`` does not truncate the file, and is instead equivalent to ``'a'``. + + When reading, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`DecompressionParameter` for detailed information about supported + parameters. The *zstd_dict* argument is a :class:`ZstdDict` instance to be + used during decompression. When reading, if the *level* + argument is not None, a :exc:`!TypeError` will be raised. + + When writing, the *options* argument can be a dictionary + providing advanced decompression parameters; see + :class:`CompressionParameter` for detailed information about supported + parameters. The *level* argument is the compression level to use when + writing compressed data. Only one of *level* or *options* may be passed. The + *zstd_dict* argument is a :class:`ZstdDict` instance to be used during + compression. + + :class:`!ZstdFile` supports all the members specified by + :class:`io.BufferedIOBase`, except for :meth:`~io.BufferedIOBase.detach` + and :meth:`~io.IOBase.truncate`. + Iteration and the :keyword:`with` statement are supported. + + The following method and attributes are also provided: + + .. method:: peek(size=-1) + + Return buffered data without advancing the file position. At least one + byte of data will be returned, unless EOF has been reached. The exact + number of bytes returned is unspecified (the *size* argument is ignored). + + .. note:: While calling :meth:`peek` does not change the file position of + the :class:`ZstdFile`, it may change the position of the underlying + file object (for example, if the :class:`ZstdFile` was constructed by + passing a file object for *file*). + + .. attribute:: mode + + ``'rb'`` for reading and ``'wb'`` for writing. + + .. attribute:: name + + The name of the Zstandard file. Equivalent to the :attr:`~io.FileIO.name` + attribute of the underlying :term:`file object`. + + +Compressing and decompressing data in memory +-------------------------------------------- + +.. function:: compress(data, level=None, options=None, zstd_dict=None) + + Compress *data* (a :term:`bytes-like object`), returning the compressed + data as a :class:`bytes` object. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + +.. function:: decompress(data, zstd_dict=None, options=None) + + Decompress *data* (a :term:`bytes-like object`), returning the uncompressed + data as a :class:`bytes` object. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + If *data* is the concatenation of multiple distinct compressed frames, + decompress all of these frames, and return the concatenation of the results. + + +.. class:: ZstdCompressor(level=None, options=None, zstd_dict=None) + + Create a compressor object, which can be used to compress data + incrementally. + + For a more convenient way of compressing a single chunk of data, see the + module-level function :func:`compress`. + + The *level* argument is an integer controlling the level of + compression. *level* is an alternative to setting + :attr:`CompressionParameter.compression_level` in *options*. Use + :meth:`~CompressionParameter.bounds` on + :attr:`~CompressionParameter.compression_level` to get the values that can + be passed for *level*. If advanced compression options are needed, the + *level* argument must be omitted and in the *options* dictionary the + :attr:`!CompressionParameter.compression_level` parameter should be set. + + The *options* argument is a Python dictionary containing advanced + compression parameters. The valid keys and values for compression parameters + are documented as part of the :class:`CompressionParameter` documentation. + + The *zstd_dict* argument is an optional instance of :class:`ZstdDict` + containing trained data to improve compression efficiency. The + function :func:`train_dict` can be used to generate a Zstandard dictionary. + + + .. method:: compress(data, mode=ZstdCompressor.CONTINUE) + + Compress *data* (a :term:`bytes-like object`), returning a :class:`bytes` + object with compressed data if possible, or otherwise an empty + :class:`!bytes` object. Some of *data* may be buffered internally, for + use in later calls to :meth:`!compress` and :meth:`~.flush`. The returned + data should be concatenated with the output of any previous calls to + :meth:`~.compress`. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.CONTINUE`, :attr:`~.FLUSH_BLOCK`, + or :attr:`~.FLUSH_FRAME`. + + When all data has been provided to the compressor, call the + :meth:`~.flush` method to finish the compression process. If + :meth:`~.compress` is called with *mode* set to :attr:`~.FLUSH_FRAME`, + :meth:`~.flush` should not be called, as it would write out a new empty + frame. + + .. method:: flush(mode=ZstdCompressor.FLUSH_FRAME) + + Finish the compression process, returning a :class:`bytes` object + containing any data stored in the compressor's internal buffers. + + The *mode* argument is a :class:`ZstdCompressor` attribute, either + :attr:`~.FLUSH_BLOCK`, or :attr:`~.FLUSH_FRAME`. + + .. attribute:: CONTINUE + + Collect more data for compression, which may or may not generate output + immediately. This mode optimizes the compression ratio by maximizing the + amount of data per block and frame. + + .. attribute:: FLUSH_BLOCK + + Complete and write a block to the data stream. The data returned so far + can be immediately decompressed. Past data can still be referenced in + future blocks generated by calls to :meth:`~.compress`, + improving compression. + + .. attribute:: FLUSH_FRAME + + Complete and write out a frame. Future data provided to + :meth:`~.compress` will be written into a new frame and + *cannot* reference past data. + + +.. class:: ZstdDecompressor(zstd_dict=None, options=None) + + Create a decompressor object, which can be used to decompress data + incrementally. + + For a more convenient way of decompressing an entire compressed stream at + once, see the module-level function :func:`decompress`. + + The *options* argument is a Python dictionary containing advanced + decompression parameters. The valid keys and values for compression + parameters are documented as part of the :class:`DecompressionParameter` + documentation. + + The *zstd_dict* argument is an instance of :class:`ZstdDict` + containing trained data used during compression. This must be + the same Zstandard dictionary used during compression. + + .. note:: + This class does not transparently handle inputs containing multiple + compressed frames, unlike the :func:`decompress` function and + :class:`ZstdFile` class. To decompress a multi-frame input, you should + use :func:`decompress`, :class:`ZstdFile` if working with a + :term:`file object`, or multiple :class:`!ZstdDecompressor` instances. + + .. method:: decompress(data, max_length=-1) + + Decompress *data* (a :term:`bytes-like object`), returning + uncompressed data as bytes. Some of *data* may be buffered + internally, for use in later calls to :meth:`!decompress`. + The returned data should be concatenated with the output of any previous + calls to :meth:`!decompress`. + + If *max_length* is non-negative, the method returns at most *max_length* + bytes of decompressed data. If this limit is reached and further + output can be produced, the :attr:`~.needs_input` attribute will + be set to ``False``. In this case, the next call to + :meth:`~.decompress` may provide *data* as ``b''`` to obtain + more of the output. + + If all of the input data was decompressed and returned (either + because this was less than *max_length* bytes, or because + *max_length* was negative), the :attr:`~.needs_input` attribute + will be set to ``True``. + + Attempting to decompress data after the end of a frame will raise a + :exc:`ZstdError`. Any data found after the end of the frame is ignored + and saved in the :attr:`~.unused_data` attribute. + + .. attribute:: eof + + ``True`` if the end-of-stream marker has been reached. + + .. attribute:: unused_data + + Data found after the end of the compressed stream. + + Before the end of the stream is reached, this will be ``b''``. + + .. attribute:: needs_input + + ``False`` if the :meth:`.decompress` method can provide more + decompressed data before requiring new compressed input. + + +Zstandard dictionaries +---------------------- + + +.. function:: train_dict(samples, dict_size) + + Train a Zstandard dictionary, returning a :class:`ZstdDict` instance. + Zstandard dictionaries enable more efficient compression of smaller sizes + of data, which is traditionally difficult to compress due to less + repetition. If you are compressing multiple similar groups of data (such as + similar files), Zstandard dictionaries can improve compression ratios and + speed significantly. + + The *samples* argument (an iterable of :class:`bytes` objects), is the + population of samples used to train the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. The Zstandard documentation suggests an + absolute maximum of no more than 100 KB, but the maximum can often be smaller + depending on the data. Larger dictionaries generally slow down compression, + but improve compression ratios. Smaller dictionaries lead to faster + compression, but reduce the compression ratio. + + +.. function:: finalize_dict(zstd_dict, /, samples, dict_size, level) + + An advanced function for converting a "raw content" Zstandard dictionary into + a regular Zstandard dictionary. "Raw content" dictionaries are a sequence of + bytes that do not need to follow the structure of a normal Zstandard + dictionary. + + The *zstd_dict* argument is a :class:`ZstdDict` instance with + the :attr:`~ZstdDict.dict_content` containing the raw dictionary contents. + + The *samples* argument (an iterable of :class:`bytes` objects), contains + sample data for generating the Zstandard dictionary. + + The *dict_size* argument, an integer, is the maximum size (in bytes) the + Zstandard dictionary should be. See :func:`train_dict` for + suggestions on the maximum dictionary size. + + The *level* argument (an integer) is the compression level expected to be + passed to the compressors using this dictionary. The dictionary information + varies for each compression level, so tuning for the proper compression + level can make compression more efficient. + + +.. class:: ZstdDict(dict_content, /, *, is_raw=False) + + A wrapper around Zstandard dictionaries. Dictionaries can be used to improve + the compression of many small chunks of data. Use :func:`train_dict` if you + need to train a new dictionary from sample data. + + The *dict_content* argument (a :term:`bytes-like object`), is the already + trained dictionary information. + + The *is_raw* argument, a boolean, is an advanced parameter controlling the + meaning of *dict_content*. ``True`` means *dict_content* is a "raw content" + dictionary, without any format restrictions. ``False`` means *dict_content* + is an ordinary Zstandard dictionary, created from Zstandard functions, + for example, :func:`train_dict` or the external :program:`zstd` CLI. + + When passing a :class:`!ZstdDict` to a function, the + :attr:`!as_digested_dict` and :attr:`!as_undigested_dict` attributes can + control how the dictionary is loaded by passing them as the ``zstd_dict`` + argument, for example, ``compress(data, zstd_dict=zd.as_digested_dict)``. + Digesting a dictionary is a costly operation that occurs when loading a + Zstandard dictionary. When making multiple calls to compression or + decompression, passing a digested dictionary will reduce the overhead of + loading the dictionary. + + .. list-table:: Difference for compression + :widths: 10 14 10 + :header-rows: 1 + + * - + - Digested dictionary + - Undigested dictionary + * - Advanced parameters of the compressor which may be overridden by + the dictionary's parameters + - ``window_log``, ``hash_log``, ``chain_log``, ``search_log``, + ``min_match``, ``target_length``, ``strategy``, + ``enable_long_distance_matching``, ``ldm_hash_log``, + ``ldm_min_match``, ``ldm_bucket_size_log``, ``ldm_hash_rate_log``, + and some non-public parameters. + - None + * - :class:`!ZstdDict` internally caches the dictionary + - Yes. It's faster when loading a digested dictionary again with the + same compression level. + - No. If you wish to load an undigested dictionary multiple times, + consider reusing a compressor object. + + If passing a :class:`!ZstdDict` without any attribute, an undigested + dictionary is passed by default when compressing and a digested dictionary + is generated if necessary and passed by default when decompressing. + + .. attribute:: dict_content + + The content of the Zstandard dictionary, a ``bytes`` object. It's the + same as the *dict_content* argument in the ``__init__`` method. It can + be used with other programs, such as the ``zstd`` CLI program. + + .. attribute:: dict_id + + Identifier of the Zstandard dictionary, a non-negative int value. + + Non-zero means the dictionary is ordinary, created by Zstandard + functions and following the Zstandard format. + + ``0`` means a "raw content" dictionary, free of any format restriction, + used for advanced users. + + .. note:: + + The meaning of ``0`` for :attr:`!ZstdDict.dict_id` is different + from the ``dictionary_id`` attribute to the :func:`get_frame_info` + function. + + .. attribute:: as_digested_dict + + Load as a digested dictionary. + + .. attribute:: as_undigested_dict + + Load as an undigested dictionary. + + +Advanced parameter control +-------------------------- + +.. class:: CompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced compression parameter + keys that can be used when compressing data. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Parameters are optional; any omitted parameter will have it's value selected + automatically. + + Example getting the lower and upper bound of :attr:`~.compression_level`:: + + lower, upper = CompressionParameter.compression_level.bounds() + + Example setting the :attr:`~.window_log` to the maximum size:: + + _lower, upper = CompressionParameter.window_log.bounds() + options = {CompressionParameter.window_log: upper} + compress(b'venezuelan beaver cheese', options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a compression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. For example, to get the valid values for + :attr:`~.compression_level`, one may check the result of + ``CompressionParameter.compression_level.bounds()``. + + Both the lower and upper bounds are inclusive. + + .. attribute:: compression_level + + A high-level means of setting other compression parameters that affect + the speed and ratio of compressing data. Setting the level to zero uses + :attr:`COMPRESSION_LEVEL_DEFAULT`. + + .. attribute:: window_log + + Maximum allowed back-reference distance the compressor can use when + compressing data, expressed as power of two, ``1 << window_log`` bytes. + This parameter greatly influences the memory usage of compression. Higher + values require more memory but gain better compression values. + + A value of zero causes the value to be selected automatically. + + .. attribute:: hash_log + + Size of the initial probe table, as a power of two. The resulting memory + usage is ``1 << (hash_log+2)`` bytes. Larger tables improve compression + ratio of strategies <= :attr:`~Strategy.dfast`, and improve compression + speed of strategies > :attr:`~Strategy.dfast`. + + A value of zero causes the value to be selected automatically. + + .. attribute:: chain_log + + Size of the multi-probe search table, as a power of two. The resulting + memory usage is ``1 << (chain_log+2)`` bytes. Larger tables result in + better and slower compression. This parameter has no effect for the + :attr:`~Strategy.fast` strategy. It's still useful when using + :attr:`~Strategy.dfast` strategy, in which case it defines a secondary + probe table. + + A value of zero causes the value to be selected automatically. + + .. attribute:: search_log + + Number of search attempts, as a power of two. More attempts result in + better and slower compression. This parameter is useless for + :attr:`~Strategy.fast` and :attr:`~Strategy.dfast` strategies. + + A value of zero causes the value to be selected automatically. + + .. attribute:: min_match + + Minimum size of searched matches. Larger values increase compression and + decompression speed, but decrease ratio. Note that Zstandard can still + find matches of smaller size, it just tweaks its search algorithm to look + for this size and larger. For all strategies < :attr:`~Strategy.btopt`, + the effective minimum is ``4``; for all strategies + > :attr:`~Strategy.fast`, the effective maximum is ``6``. + + A value of zero causes the value to be selected automatically. + + .. attribute:: target_length + + The impact of this field depends on the selected :class:`Strategy`. + + For strategies :attr:`~Strategy.btopt`, :attr:`~Strategy.btultra` and + :attr:`~Strategy.btultra2`, the value is the length of a match + considered "good enough" to stop searching. Larger values make + compression ratios better, but compresses slower. + + For strategy :attr:`~Strategy.fast`, it is the distance between match + sampling. Larger values make compression faster, but with a worse + compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: strategy + + The higher the value of selected strategy, the more complex the + compression technique used by zstd, resulting in higher compression + ratios but slower compression. + + .. seealso:: :class:`Strategy` + + .. attribute:: enable_long_distance_matching + + Long distance matching can be used to improve compression for large + inputs by finding large matches at greater distances. It increases memory + usage and window size. + + ``True`` or ``1`` enable long distance matching while ``False`` or ``0`` + disable it. + + Enabling this parameter increases default + :attr:`~CompressionParameter.window_log` to 128 MiB except when expressly + set to a different value. This setting is enabled by default if + :attr:`!window_log` >= 128 MiB and the compression + strategy >= :attr:`~Strategy.btopt` (compression level 16+). + + .. attribute:: ldm_hash_log + + Size of the table for long distance matching, as a power of two. Larger + values increase memory usage and compression ratio, but decrease + compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_min_match + + Minimum match size for long distance matcher. Larger or too small values + can often decrease the compression ratio. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_bucket_size_log + + Log size of each bucket in the long distance matcher hash table for + collision resolution. Larger values improve collision resolution but + decrease compression speed. + + A value of zero causes the value to be selected automatically. + + .. attribute:: ldm_hash_rate_log + + Frequency of inserting/looking up entries into the long distance matcher + hash table. Larger values improve compression speed. Deviating far from + the default value will likely result in a compression ratio decrease. + + A value of zero causes the value to be selected automatically. + + .. attribute:: checksum_flag + + A four-byte checksum using XXHash64 of the uncompressed content is + written at the end of each frame. Zstandard's decompression code verifies + the checksum. If there is a mismatch a :class:`ZstdError` exception is + raised. + + ``True`` or ``1`` enable checksum generation while ``False`` or ``0`` + disable it. + + .. attribute:: dict_id_flag + + When compressing with a :class:`ZstdDict`, the dictionary's ID is written + into the frame header. + + ``True`` or ``1`` enable storing the dictionary ID while ``False`` or + ``0`` disable it. + + .. attribute:: nb_workers + + Select how many threads will be spawned to compress in parallel. When + :attr:`!nb_workers` > 0, enables multi-threaded compression, a value of + ``1`` means "one-thread multi-threaded mode". More workers improve speed, + but also increase memory usage and slightly reduce compression ratio. + + A value of zero disables multi-threading. + + .. attribute:: job_size + + Size of a compression job, in bytes. This value is enforced only when + :attr:`~CompressionParameter.nb_workers` >= 1. Each compression job is + completed in parallel, so this value can indirectly impact the number of + active threads. + + A value of zero causes the value to be selected automatically. + + .. attribute:: overlap_log + + Sets how much data is reloaded from previous jobs (threads) for new jobs + to be used by the look behind window during compression. This value is + only used when :attr:`~CompressionParameter.nb_workers` >= 1. Acceptable + values vary from 0 to 9. + + * 0 means dynamically set the overlap amount + * 1 means no overlap + * 9 means use a full window size from the previous job + + Each increment halves/doubles the overlap size. "8" means an overlap of + ``window_size/2``, "7" means an overlap of ``window_size/4``, etc. + +.. class:: DecompressionParameter() + + An :class:`~enum.IntEnum` containing the advanced decompression parameter + keys that can be used when decompressing data. Parameters are optional; any + omitted parameter will have it's value selected automatically. + + The :meth:`~.bounds` method can be used on any attribute to get the valid + values for that parameter. + + Example setting the :attr:`~.window_log_max` to the maximum size:: + + data = compress(b'Some very long buffer of bytes...') + + _lower, upper = DecompressionParameter.window_log_max.bounds() + + options = {DecompressionParameter.window_log_max: upper} + decompress(data, options=options) + + .. method:: bounds() + + Return the tuple of int bounds, ``(lower, upper)``, of a decompression + parameter. This method should be called on the attribute you wish to + retrieve the bounds of. + + Both the lower and upper bounds are inclusive. + + .. attribute:: window_log_max + + The base-two logarithm of the maximum size of the window used during + decompression. This can be useful to limit the amount of memory used when + decompressing data. A larger maximum window size leads to faster + decompression. + + A value of zero causes the value to be selected automatically. + + +.. class:: Strategy() + + An :class:`~enum.IntEnum` containing strategies for compression. + Higher-numbered strategies correspond to more complex and slower + compression. + + .. note:: + + The values of attributes of :class:`!Strategy` are not necessarily stable + across zstd versions. Only the ordering of the attributes may be relied + upon. The attributes are listed below in order. + + The following strategies are available: + + .. attribute:: fast + + .. attribute:: dfast + + .. attribute:: greedy + + .. attribute:: lazy + + .. attribute:: lazy2 + + .. attribute:: btlazy2 + + .. attribute:: btopt + + .. attribute:: btultra + + .. attribute:: btultra2 + + +Miscellaneous +------------- + +.. function:: get_frame_info(frame_buffer) + + Retrieve a :class:`FrameInfo` object containing metadata about a Zstandard + frame. Frames contain metadata related to the compressed data they hold. + + +.. class:: FrameInfo + + Metadata related to a Zstandard frame. + + .. attribute:: decompressed_size + + The size of the decompressed contents of the frame. + + .. attribute:: dictionary_id + + An integer representing the Zstandard dictionary ID needed for + decompressing the frame. ``0`` means the dictionary ID was not + recorded in the frame header. This may mean that a Zstandard dictionary + is not needed, or that the ID of a required dictionary was not recorded. + + +.. attribute:: COMPRESSION_LEVEL_DEFAULT + + The default compression level for Zstandard: ``3``. + + +.. attribute:: zstd_version_info + + Version number of the runtime zstd library as a tuple of integers + (major, minor, release). + + +Examples +-------- + +Reading in a compressed file: + +.. code-block:: python + + from compression import zstd + + with zstd.open("file.zst") as f: + file_content = f.read() + +Creating a compressed file: + +.. code-block:: python + + from compression import zstd + + data = b"Insert Data Here" + with zstd.open("file.zst", "w") as f: + f.write(data) + +Compressing data in memory: + +.. code-block:: python + + from compression import zstd + + data_in = b"Insert Data Here" + data_out = zstd.compress(data_in) + +Incremental compression: + +.. code-block:: python + + from compression import zstd + + comp = zstd.ZstdCompressor() + out1 = comp.compress(b"Some data\n") + out2 = comp.compress(b"Another piece of data\n") + out3 = comp.compress(b"Even more data\n") + out4 = comp.flush() + # Concatenate all the partial results: + result = b"".join([out1, out2, out3, out4]) + +Writing compressed data to an already-open file: + +.. code-block:: python + + from compression import zstd + + with open("myfile", "wb") as f: + f.write(b"This data will not be compressed\n") + with zstd.open(f, "w") as zstf: + zstf.write(b"This *will* be compressed\n") + f.write(b"Not compressed\n") + +Creating a compressed file using compression parameters: + +.. code-block:: python + + from compression import zstd + + options = { + zstd.CompressionParameter.checksum_flag: 1 + } + with zstd.open("file.zst", "w", options=options) as f: + f.write(b"Mind if I squeeze in?") diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 7efae9e628b..3c8d9ab111e 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -6,8 +6,9 @@ .. versionadded:: 3.2 -**Source code:** :source:`Lib/concurrent/futures/thread.py` -and :source:`Lib/concurrent/futures/process.py` +**Source code:** :source:`Lib/concurrent/futures/thread.py`, +:source:`Lib/concurrent/futures/process.py`, +and :source:`Lib/concurrent/futures/interpreter.py` -------------- diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index d3a81ce6bf7..5b733d5321e 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -1779,7 +1779,8 @@ in :mod:`!ctypes`) which inherits from the private :class:`_CFuncPtr` class: .. audit-event:: ctypes.call_function func_pointer,arguments foreign-functions - Some ways to invoke foreign function calls may raise an auditing event + Some ways to invoke foreign function calls as well as some of the + functions in this module may raise an auditing event ``ctypes.call_function`` with arguments ``function pointer`` and ``arguments``. .. _ctypes-function-prototypes: @@ -2171,10 +2172,20 @@ Utility functions .. function:: POINTER(type, /) - Create and return a new ctypes pointer type. Pointer types are cached and + Create or return a ctypes pointer type. Pointer types are cached and reused internally, so calling this function repeatedly is cheap. *type* must be a ctypes type. + .. impl-detail:: + + The resulting pointer type is cached in the ``__pointer_type__`` + attribute of *type*. + It is possible to set this attribute before the first call to + ``POINTER`` in order to set a custom pointer type. + However, doing this is discouraged: manually creating a suitable + pointer type is difficult without relying on implementation + details that may change in future Python versions. + .. function:: pointer(obj, /) @@ -2339,6 +2350,16 @@ Data types library. *name* is the name of the symbol that exports the data, *library* is the loaded shared library. + Common class variables of ctypes data types: + + .. attribute:: __pointer_type__ + + The pointer type that was created by calling + :func:`POINTER` for corresponding ctypes data type. If a pointer type + was not yet created, the attribute is missing. + + .. versionadded:: 3.14 + Common instance variables of ctypes data types: .. attribute:: _b_base_ @@ -2733,6 +2754,16 @@ fields, or any other data types containing pointer type fields. when :attr:`_fields_` is assigned, otherwise it will have no effect. Setting this attribute to 0 is the same as not setting it at all. + This is only implemented for the MSVC-compatible memory layout. + + .. deprecated-removed:: 3.14 3.19 + + For historical reasons, if :attr:`!_pack_` is non-zero, + the MSVC-compatible layout will be used by default. + On non-Windows platforms, this default is deprecated and is slated to + become an error in Python 3.19. + If it is intended, set :attr:`~Structure._layout_` to ``'ms'`` + explicitly. .. attribute:: _align_ @@ -2761,12 +2792,15 @@ fields, or any other data types containing pointer type fields. Currently the default will be: - On Windows: ``"ms"`` - - When :attr:`~Structure._pack_` is specified: ``"ms"`` + - When :attr:`~Structure._pack_` is specified: ``"ms"``. + (This is deprecated; see :attr:`~Structure._pack_` documentation.) - Otherwise: ``"gcc-sysv"`` :attr:`!_layout_` must already be defined when :attr:`~Structure._fields_` is assigned, otherwise it will have no effect. + .. versionadded:: 3.14 + .. attribute:: _anonymous_ An optional sequence that lists the names of unnamed (anonymous) fields. diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 6c7fc721a3e..0b13c559295 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -68,6 +68,21 @@ The module :mod:`curses` defines the following exception: The module :mod:`curses` defines the following functions: +.. function:: assume_default_colors(fg, bg, /) + + Allow use of default values for colors on terminals supporting this feature. + Use this to support transparency in your application. + + * Assign terminal default foreground/background colors to color number ``-1``. + So ``init_pair(x, COLOR_RED, -1)`` will initialize pair *x* as red + on default background and ``init_pair(x, -1, COLOR_BLUE)`` will + initialize pair *x* as default foreground on blue. + + * Change the definition of the color-pair ``0`` to ``(fg, bg)``. + + .. versionadded:: 3.14 + + .. function:: baudrate() Return the output speed of the terminal in bits per second. On software @@ -290,9 +305,11 @@ The module :mod:`curses` defines the following functions: Change the definition of a color-pair. It takes three arguments: the number of the color-pair to be changed, the foreground color number, and the background color number. The value of *pair_number* must be between ``1`` and - ``COLOR_PAIRS - 1`` (the ``0`` color pair is wired to white on black and cannot - be changed). The value of *fg* and *bg* arguments must be between ``0`` and - ``COLORS - 1``, or, after calling :func:`use_default_colors`, ``-1``. + ``COLOR_PAIRS - 1`` (the ``0`` color pair can only be changed by + :func:`use_default_colors` and :func:`assume_default_colors`). + The value of *fg* and *bg* arguments must be between ``0`` and + ``COLORS - 1``, or, after calling :func:`!use_default_colors` or + :func:`!assume_default_colors`, ``-1``. If the color-pair was previously initialized, the screen is refreshed and all occurrences of that color-pair are changed to the new definition. @@ -678,11 +695,7 @@ The module :mod:`curses` defines the following functions: .. function:: use_default_colors() - Allow use of default values for colors on terminals supporting this feature. Use - this to support transparency in your application. The default color is assigned - to the color number ``-1``. After calling this function, ``init_pair(x, - curses.COLOR_RED, -1)`` initializes, for instance, color pair *x* to a red - foreground color on the default background. + Equivalent to ``assume_default_colors(-1, -1)``. .. function:: wrapper(func, /, *args, **kwargs) @@ -975,6 +988,10 @@ the following methods and attributes: window.getstr(y, x, n) Read a bytes object from the user, with primitive line editing capacity. + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.getyx() @@ -1066,6 +1083,10 @@ the following methods and attributes: current cursor position, or at *y*, *x* if specified. Attributes are stripped from the characters. If *n* is specified, :meth:`instr` returns a string at most *n* characters long (exclusive of the trailing NUL). + The maximum value for *n* is 2047. + + .. versionchanged:: 3.14 + The maximum value for *n* was increased from 1023 to 2047. .. method:: window.is_linetouched(line) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 0bc171da4ee..f18c7cc9c02 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -304,9 +304,9 @@ Module contents .. versionadded:: 3.10 - - ``doc``: optional docstring for this field. + - *doc*: optional docstring for this field. - .. versionadded:: 3.13 + .. versionadded:: 3.14 If the default value of a field is specified by a call to :func:`!field`, then the class attribute for this field will be @@ -344,6 +344,15 @@ Module contents Other attributes may exist, but they are private and must not be inspected or relied on. +.. class:: InitVar + + ``InitVar[T]`` type annotations describe variables that are :ref:`init-only + `. Fields annotated with :class:`!InitVar` + are considered pseudo-fields, and thus are neither returned by the + :func:`fields` function nor used in any way except adding them as + parameters to :meth:`~object.__init__` and an optional + :meth:`__post_init__`. + .. function:: fields(class_or_instance) Returns a tuple of :class:`Field` objects that define the fields for this @@ -600,8 +609,8 @@ Init-only variables Another place where :func:`@dataclass ` inspects a type annotation is to determine if a field is an init-only variable. It does this by seeing -if the type of a field is of type ``dataclasses.InitVar``. If a field -is an ``InitVar``, it is considered a pseudo-field called an init-only +if the type of a field is of type :class:`InitVar`. If a field +is an :class:`InitVar`, it is considered a pseudo-field called an init-only field. As it is not a true field, it is not returned by the module-level :func:`fields` function. Init-only fields are added as parameters to the generated :meth:`~object.__init__` method, and are passed to diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 1ce2013f05d..16ed3215bc2 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -261,6 +261,22 @@ A :class:`timedelta` object represents a duration, the difference between two >>> (d.days, d.seconds, d.microseconds) (-1, 86399, 999999) + Since the string representation of :class:`!timedelta` objects can be confusing, + use the following recipe to produce a more readable format: + + .. code-block:: pycon + + >>> def pretty_timedelta(td): + ... if td.days >= 0: + ... return str(td) + ... return f'-({-td!s})' + ... + >>> d = timedelta(hours=-1) + >>> str(d) # not human-friendly + '-1 day, 23:00:00' + >>> pretty_timedelta(d) + '-(1:00:00)' + Class attributes: @@ -1486,11 +1502,11 @@ Instance methods: returned by :func:`time.time`. Naive :class:`.datetime` instances are assumed to represent local - time and this method relies on the platform C :c:func:`mktime` - function to perform the conversion. Since :class:`.datetime` - supports wider range of values than :c:func:`mktime` on many - platforms, this method may raise :exc:`OverflowError` or :exc:`OSError` - for times far in the past or far in the future. + time and this method relies on platform C functions to perform + the conversion. Since :class:`.datetime` supports a wider range of + values than the platform C functions on many platforms, this + method may raise :exc:`OverflowError` or :exc:`OSError` for times + far in the past or far in the future. For aware :class:`.datetime` instances, the return value is computed as:: @@ -1503,6 +1519,10 @@ Instance methods: The :meth:`timestamp` method uses the :attr:`.fold` attribute to disambiguate the times during a repeated interval. + .. versionchanged:: 3.6 + This method no longer relies on the platform C :c:func:`mktime` + function to perform conversions. + .. note:: There is no method to obtain the POSIX timestamp directly from a diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index 1b334b0aa5f..10ddfa02b43 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -2,7 +2,7 @@ ===================================================================== .. module:: decimal - :synopsis: Implementation of the General Decimal Arithmetic Specification. + :synopsis: Implementation of the General Decimal Arithmetic Specification. .. moduleauthor:: Eric Price .. moduleauthor:: Facundo Batista @@ -121,7 +121,7 @@ reset them before monitoring a calculation. .. _decimal-tutorial: -Quick-start Tutorial +Quick-start tutorial -------------------- The usual start to using decimals is importing the module, viewing the current @@ -367,6 +367,8 @@ Decimal objects appears above. These include decimal digits from various other alphabets (for example, Arabic-Indic and Devanāgarī digits) along with the fullwidth digits ``'\uff10'`` through ``'\uff19'``. + Case is not significant, so, for example, ``inf``, ``Inf``, ``INFINITY``, + and ``iNfINity`` are all acceptable spellings for positive infinity. If *value* is a :class:`tuple`, it should have three components, a sign (``0`` for positive or ``1`` for negative), a :class:`tuple` of @@ -1029,6 +1031,14 @@ function to temporarily change the active context. .. versionchanged:: 3.11 :meth:`localcontext` now supports setting context attributes through the use of keyword arguments. +.. function:: IEEEContext(bits) + + Return a context object initialized to the proper values for one of the + IEEE interchange formats. The argument must be a multiple of 32 and less + than :const:`IEEE_CONTEXT_MAX_BITS`. + + .. versionadded:: 3.14 + New contexts can also be created using the :class:`Context` constructor described below. In addition, the module provides three pre-made contexts: @@ -1086,40 +1096,52 @@ In addition to the three supplied contexts, new contexts can be created with the default values are copied from the :const:`DefaultContext`. If the *flags* field is not specified or is :const:`None`, all flags are cleared. - *prec* is an integer in the range [``1``, :const:`MAX_PREC`] that sets - the precision for arithmetic operations in the context. + .. attribute:: prec - The *rounding* option is one of the constants listed in the section - `Rounding Modes`_. + An integer in the range [``1``, :const:`MAX_PREC`] that sets + the precision for arithmetic operations in the context. - The *traps* and *flags* fields list any signals to be set. Generally, new - contexts should only set traps and leave the flags clear. + .. attribute:: rounding - The *Emin* and *Emax* fields are integers specifying the outer limits allowable - for exponents. *Emin* must be in the range [:const:`MIN_EMIN`, ``0``], - *Emax* in the range [``0``, :const:`MAX_EMAX`]. + One of the constants listed in the section `Rounding Modes`_. - The *capitals* field is either ``0`` or ``1`` (the default). If set to - ``1``, exponents are printed with a capital ``E``; otherwise, a - lowercase ``e`` is used: ``Decimal('6.02e+23')``. + .. attribute:: traps + flags - The *clamp* field is either ``0`` (the default) or ``1``. - If set to ``1``, the exponent ``e`` of a :class:`Decimal` - instance representable in this context is strictly limited to the - range ``Emin - prec + 1 <= e <= Emax - prec + 1``. If *clamp* is - ``0`` then a weaker condition holds: the adjusted exponent of - the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is - ``1``, a large normal number will, where possible, have its - exponent reduced and a corresponding number of zeros added to its - coefficient, in order to fit the exponent constraints; this - preserves the value of the number but loses information about - significant trailing zeros. For example:: + Lists of any signals to be set. Generally, new contexts should only set + traps and leave the flags clear. - >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') - Decimal('1.23000E+999') + .. attribute:: Emin + Emax - A *clamp* value of ``1`` allows compatibility with the - fixed-width decimal interchange formats specified in IEEE 754. + Integers specifying the outer limits allowable for exponents. *Emin* must + be in the range [:const:`MIN_EMIN`, ``0``], *Emax* in the range + [``0``, :const:`MAX_EMAX`]. + + .. attribute:: capitals + + Either ``0`` or ``1`` (the default). If set to + ``1``, exponents are printed with a capital ``E``; otherwise, a + lowercase ``e`` is used: ``Decimal('6.02e+23')``. + + .. attribute:: clamp + + Either ``0`` (the default) or ``1``. If set to ``1``, the exponent ``e`` + of a :class:`Decimal` instance representable in this context is strictly + limited to the range ``Emin - prec + 1 <= e <= Emax - prec + 1``. + If *clamp* is ``0`` then a weaker condition holds: the adjusted exponent of + the :class:`Decimal` instance is at most :attr:`~Context.Emax`. When *clamp* is + ``1``, a large normal number will, where possible, have its + exponent reduced and a corresponding number of zeros added to its + coefficient, in order to fit the exponent constraints; this + preserves the value of the number but loses information about + significant trailing zeros. For example:: + + >>> Context(prec=6, Emax=999, clamp=1).create_decimal('1.23e999') + Decimal('1.23000E+999') + + A *clamp* value of ``1`` allows compatibility with the + fixed-width decimal interchange formats specified in IEEE 754. The :class:`Context` class defines several general purpose methods as well as a large number of methods for doing arithmetic directly in a given context. @@ -1550,18 +1572,19 @@ Constants The constants in this section are only relevant for the C module. They are also included in the pure Python version for compatibility. -+---------------------+---------------------+-------------------------------+ -| | 32-bit | 64-bit | -+=====================+=====================+===============================+ -| .. data:: MAX_PREC | ``425000000`` | ``999999999999999999`` | -+---------------------+---------------------+-------------------------------+ -| .. data:: MAX_EMAX | ``425000000`` | ``999999999999999999`` | -+---------------------+---------------------+-------------------------------+ -| .. data:: MIN_EMIN | ``-425000000`` | ``-999999999999999999`` | -+---------------------+---------------------+-------------------------------+ -| .. data:: MIN_ETINY | ``-849999999`` | ``-1999999999999999997`` | -+---------------------+---------------------+-------------------------------+ - ++---------------------------------+---------------------+-------------------------------+ +| | 32-bit | 64-bit | ++=================================+=====================+===============================+ +| .. data:: MAX_PREC | ``425000000`` | ``999999999999999999`` | ++---------------------------------+---------------------+-------------------------------+ +| .. data:: MAX_EMAX | ``425000000`` | ``999999999999999999`` | ++---------------------------------+---------------------+-------------------------------+ +| .. data:: MIN_EMIN | ``-425000000`` | ``-999999999999999999`` | ++---------------------------------+---------------------+-------------------------------+ +| .. data:: MIN_ETINY | ``-849999999`` | ``-1999999999999999997`` | ++---------------------------------+---------------------+-------------------------------+ +| .. data:: IEEE_CONTEXT_MAX_BITS | ``256`` | ``512`` | ++---------------------------------+---------------------+-------------------------------+ .. data:: HAVE_THREADS @@ -1758,7 +1781,7 @@ The following table summarizes the hierarchy of signals:: .. _decimal-notes: -Floating-Point Notes +Floating-point notes -------------------- diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 4fde5bf9038..44767b5dd2d 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1354,9 +1354,6 @@ iterations of the loop. If ``STACK[-1]`` is not ``None``, increments the bytecode counter by *delta*. ``STACK[-1]`` is popped. - This opcode is a pseudo-instruction, replaced in final bytecode by - the directed versions (forward/backward). - .. versionadded:: 3.11 .. versionchanged:: 3.12 @@ -1368,9 +1365,6 @@ iterations of the loop. If ``STACK[-1]`` is ``None``, increments the bytecode counter by *delta*. ``STACK[-1]`` is popped. - This opcode is a pseudo-instruction, replaced in final bytecode by - the directed versions (forward/backward). - .. versionadded:: 3.11 .. versionchanged:: 3.12 @@ -1673,7 +1667,7 @@ iterations of the loop. * ``oparg == 2``: call :func:`repr` on *value* * ``oparg == 3``: call :func:`ascii` on *value* - Used for implementing formatted literal strings (f-strings). + Used for implementing formatted string literals (f-strings). .. versionadded:: 3.13 @@ -1686,7 +1680,7 @@ iterations of the loop. result = value.__format__("") STACK.append(result) - Used for implementing formatted literal strings (f-strings). + Used for implementing formatted string literals (f-strings). .. versionadded:: 3.13 @@ -1699,7 +1693,7 @@ iterations of the loop. result = value.__format__(spec) STACK.append(result) - Used for implementing formatted literal strings (f-strings). + Used for implementing formatted string literals (f-strings). .. versionadded:: 3.13 diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index c5660433903..bb72032891e 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -428,6 +428,7 @@ The following exceptions are the exceptions that are usually raised. :exc:`PythonFinalizationError` during the Python finalization: * Creating a new Python thread. + * :meth:`Joining ` a running daemon thread. * :func:`os.fork`. See also the :func:`sys.is_finalizing` function. @@ -435,6 +436,9 @@ The following exceptions are the exceptions that are usually raised. .. versionadded:: 3.13 Previously, a plain :exc:`RuntimeError` was raised. + .. versionchanged:: 3.14 + + :meth:`threading.Thread.join` can now raise this exception. .. exception:: RecursionError diff --git a/Doc/library/faulthandler.rst b/Doc/library/faulthandler.rst index b7df9f6b9bc..5058b85bffb 100644 --- a/Doc/library/faulthandler.rst +++ b/Doc/library/faulthandler.rst @@ -66,10 +66,41 @@ Dumping the traceback Added support for passing file descriptor to this function. +Dumping the C stack +------------------- + +.. versionadded:: 3.14 + +.. function:: dump_c_stack(file=sys.stderr) + + Dump the C stack trace of the current thread into *file*. + + If the Python build does not support it or the operating system + does not provide a stack trace, then this prints an error in place + of a dumped C stack. + +.. _c-stack-compatibility: + +C Stack Compatibility +********************* + +If the system does not support the C-level :manpage:`backtrace(3)` +or :manpage:`dladdr1(3)`, then C stack dumps will not work. +An error will be printed instead of the stack. + +Additionally, some compilers do not support :term:`CPython's ` +implementation of C stack dumps. As a result, a different error may be printed +instead of the stack, even if the the operating system supports dumping stacks. + +.. note:: + + Dumping C stacks can be arbitrarily slow, depending on the DWARF level + of the binaries in the call stack. + Fault handler state ------------------- -.. function:: enable(file=sys.stderr, all_threads=True) +.. function:: enable(file=sys.stderr, all_threads=True, c_stack=True) Enable the fault handler: install handlers for the :const:`~signal.SIGSEGV`, :const:`~signal.SIGFPE`, :const:`~signal.SIGABRT`, :const:`~signal.SIGBUS` @@ -81,6 +112,10 @@ Fault handler state The *file* must be kept open until the fault handler is disabled: see :ref:`issue with file descriptors `. + If *c_stack* is ``True``, then the C stack trace is printed after the Python + traceback, unless the system does not support it. See :func:`dump_c_stack` for + more information on compatibility. + .. versionchanged:: 3.5 Added support for passing file descriptor to this function. @@ -95,6 +130,9 @@ Fault handler state Only the current thread is dumped if the :term:`GIL` is disabled to prevent the risk of data races. + .. versionchanged:: 3.14 + The dump now displays the C stack trace if *c_stack* is true. + .. function:: disable() Disable the fault handler: uninstall the signal handlers installed by diff --git a/Doc/library/fcntl.rst b/Doc/library/fcntl.rst index 7bd64e43dd5..5c078df44ff 100644 --- a/Doc/library/fcntl.rst +++ b/Doc/library/fcntl.rst @@ -79,68 +79,95 @@ descriptor. On macOS and NetBSD, the :mod:`!fcntl` module exposes the ``F_GETNOSIGPIPE`` and ``F_SETNOSIGPIPE`` constant. +.. versionchanged:: 3.14 + On Linux >= 6.1, the :mod:`!fcntl` module exposes the ``F_DUPFD_QUERY`` + to query a file descriptor pointing to the same file. + The module defines the following functions: -.. function:: fcntl(fd, cmd, arg=0) +.. function:: fcntl(fd, cmd, arg=0, /) Perform the operation *cmd* on file descriptor *fd* (file objects providing a :meth:`~io.IOBase.fileno` method are accepted as well). The values used for *cmd* are operating system dependent, and are available as constants in the :mod:`fcntl` module, using the same names as used in the relevant C - header files. The argument *arg* can either be an integer value, or a - :class:`bytes` object. With an integer value, the return value of this - function is the integer return value of the C :c:func:`fcntl` call. When - the argument is bytes it represents a binary structure, e.g. created by - :func:`struct.pack`. The binary data is copied to a buffer whose address is + header files. The argument *arg* can either be an integer value, a + :term:`bytes-like object`, or a string. + The type and size of *arg* must match the type and size of + the argument of the operation as specified in the relevant C documentation. + + When *arg* is an integer, the function returns the integer + return value of the C :c:func:`fcntl` call. + + When the argument is bytes-like object, it represents a binary structure, + for example, created by :func:`struct.pack`. + A string value is encoded to binary using the UTF-8 encoding. + The binary data is copied to a buffer whose address is passed to the C :c:func:`fcntl` call. The return value after a successful call is the contents of the buffer, converted to a :class:`bytes` object. The length of the returned object will be the same as the length of the - *arg* argument. This is limited to 1024 bytes. If the information returned - in the buffer by the operating system is larger than 1024 bytes, this is - most likely to result in a segmentation violation or a more subtle data - corruption. + *arg* argument. If the :c:func:`fcntl` call fails, an :exc:`OSError` is raised. + .. note:: + If the type or size of *arg* does not match the type or size + of the operation's argument (for example, if an integer is + passed when a pointer is expected, or the information returned in + the buffer by the operating system is larger than the size of *arg*), + this is most likely to result in a segmentation violation or + a more subtle data corruption. + .. audit-event:: fcntl.fcntl fd,cmd,arg fcntl.fcntl + .. versionchanged:: 3.14 + Add support of arbitrary :term:`bytes-like objects `, + not only :class:`bytes`. -.. function:: ioctl(fd, request, arg=0, mutate_flag=True) + .. versionchanged:: next + The size of bytes-like objects is no longer limited to 1024 bytes. + + +.. function:: ioctl(fd, request, arg=0, mutate_flag=True, /) This function is identical to the :func:`~fcntl.fcntl` function, except that the argument handling is even more complicated. - The *request* parameter is limited to values that can fit in 32-bits. + The *request* parameter is limited to values that can fit in 32-bits + or 64-bits, depending on the platform. Additional constants of interest for use as the *request* argument can be found in the :mod:`termios` module, under the same names as used in the relevant C header files. - The parameter *arg* can be one of an integer, an object supporting the - read-only buffer interface (like :class:`bytes`) or an object supporting - the read-write buffer interface (like :class:`bytearray`). + The parameter *arg* can be an integer, a :term:`bytes-like object`, + or a string. + The type and size of *arg* must match the type and size of + the argument of the operation as specified in the relevant C documentation. - In all but the last case, behaviour is as for the :func:`~fcntl.fcntl` + If *arg* does not support the read-write buffer interface or + the *mutate_flag* is false, behavior is as for the :func:`~fcntl.fcntl` function. - If a mutable buffer is passed, then the behaviour is determined by the value of - the *mutate_flag* parameter. - - If it is false, the buffer's mutability is ignored and behaviour is as for a - read-only buffer, except that the 1024 byte limit mentioned above is avoided -- - so long as the buffer you pass is at least as long as what the operating system - wants to put there, things should work. - - If *mutate_flag* is true (the default), then the buffer is (in effect) passed - to the underlying :func:`ioctl` system call, the latter's return code is + If *arg* supports the read-write buffer interface (like :class:`bytearray`) + and *mutate_flag* is true (the default), then the buffer is (in effect) passed + to the underlying :c:func:`!ioctl` system call, the latter's return code is passed back to the calling Python, and the buffer's new contents reflect the - action of the :func:`ioctl`. This is a slight simplification, because if the + action of the :c:func:`ioctl`. This is a slight simplification, because if the supplied buffer is less than 1024 bytes long it is first copied into a static buffer 1024 bytes long which is then passed to :func:`ioctl` and copied back into the supplied buffer. If the :c:func:`ioctl` call fails, an :exc:`OSError` exception is raised. + .. note:: + If the type or size of *arg* does not match the type or size + of the operation's argument (for example, if an integer is + passed when a pointer is expected, or the information returned in + the buffer by the operating system is larger than the size of *arg*), + this is most likely to result in a segmentation violation or + a more subtle data corruption. + An example:: >>> import array, fcntl, struct, termios, os @@ -156,8 +183,15 @@ The module defines the following functions: .. audit-event:: fcntl.ioctl fd,request,arg fcntl.ioctl + .. versionchanged:: 3.14 + The GIL is always released during a system call. + System calls failing with EINTR are automatically retried. -.. function:: flock(fd, operation) + .. versionchanged:: next + The size of not mutated bytes-like objects is no longer + limited to 1024 bytes. + +.. function:: flock(fd, operation, /) Perform the lock operation *operation* on file descriptor *fd* (file objects providing a :meth:`~io.IOBase.fileno` method are accepted as well). See the Unix manual @@ -169,7 +203,7 @@ The module defines the following functions: .. audit-event:: fcntl.flock fd,operation fcntl.flock -.. function:: lockf(fd, cmd, len=0, start=0, whence=0) +.. function:: lockf(fd, cmd, len=0, start=0, whence=0, /) This is essentially a wrapper around the :func:`~fcntl.fcntl` locking calls. *fd* is the file descriptor (file objects providing a :meth:`~io.IOBase.fileno` diff --git a/Doc/library/fnmatch.rst b/Doc/library/fnmatch.rst index 8674e855b8e..12e61bc36f5 100644 --- a/Doc/library/fnmatch.rst +++ b/Doc/library/fnmatch.rst @@ -111,7 +111,7 @@ functions: :func:`fnmatch`, :func:`fnmatchcase`, :func:`.filter`. >>> >>> regex = fnmatch.translate('*.txt') >>> regex - '(?s:.*\\.txt)\\Z' + '(?s:.*\\.txt)\\z' >>> reobj = re.compile(regex) >>> reobj.match('foobar.txt') diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7e367a0f2b6..2ecce3dba5a 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1154,44 +1154,44 @@ are always available. They are listed here in alphabetical order. .. function:: locals() - Return a mapping object representing the current local symbol table, with - variable names as the keys, and their currently bound references as the - values. + Return a mapping object representing the current local symbol table, with + variable names as the keys, and their currently bound references as the + values. - At module scope, as well as when using :func:`exec` or :func:`eval` with - a single namespace, this function returns the same namespace as - :func:`globals`. + At module scope, as well as when using :func:`exec` or :func:`eval` with + a single namespace, this function returns the same namespace as + :func:`globals`. - At class scope, it returns the namespace that will be passed to the - metaclass constructor. + At class scope, it returns the namespace that will be passed to the + metaclass constructor. - When using ``exec()`` or ``eval()`` with separate local and global - arguments, it returns the local namespace passed in to the function call. + When using ``exec()`` or ``eval()`` with separate local and global + arguments, it returns the local namespace passed in to the function call. - In all of the above cases, each call to ``locals()`` in a given frame of - execution will return the *same* mapping object. Changes made through - the mapping object returned from ``locals()`` will be visible as assigned, - reassigned, or deleted local variables, and assigning, reassigning, or - deleting local variables will immediately affect the contents of the - returned mapping object. + In all of the above cases, each call to ``locals()`` in a given frame of + execution will return the *same* mapping object. Changes made through + the mapping object returned from ``locals()`` will be visible as assigned, + reassigned, or deleted local variables, and assigning, reassigning, or + deleting local variables will immediately affect the contents of the + returned mapping object. - In an :term:`optimized scope` (including functions, generators, and - coroutines), each call to ``locals()`` instead returns a fresh dictionary - containing the current bindings of the function's local variables and any - nonlocal cell references. In this case, name binding changes made via the - returned dict are *not* written back to the corresponding local variables - or nonlocal cell references, and assigning, reassigning, or deleting local - variables and nonlocal cell references does *not* affect the contents - of previously returned dictionaries. + In an :term:`optimized scope` (including functions, generators, and + coroutines), each call to ``locals()`` instead returns a fresh dictionary + containing the current bindings of the function's local variables and any + nonlocal cell references. In this case, name binding changes made via the + returned dict are *not* written back to the corresponding local variables + or nonlocal cell references, and assigning, reassigning, or deleting local + variables and nonlocal cell references does *not* affect the contents + of previously returned dictionaries. - Calling ``locals()`` as part of a comprehension in a function, generator, or - coroutine is equivalent to calling it in the containing scope, except that - the comprehension's initialised iteration variables will be included. In - other scopes, it behaves as if the comprehension were running as a nested - function. + Calling ``locals()`` as part of a comprehension in a function, generator, or + coroutine is equivalent to calling it in the containing scope, except that + the comprehension's initialised iteration variables will be included. In + other scopes, it behaves as if the comprehension were running as a nested + function. - Calling ``locals()`` as part of a generator expression is equivalent to - calling it in a nested generator function. + Calling ``locals()`` as part of a generator expression is equivalent to + calling it in a nested generator function. .. versionchanged:: 3.12 The behaviour of ``locals()`` in a comprehension has been updated as diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 3a933dff057..3e75621be6d 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -403,8 +403,7 @@ The :mod:`functools` module defines the following functions: >>> remove_first_dear(message) 'Hello, dear world!' - :data:`!Placeholder` has no special treatment when used in a keyword - argument to :func:`!partial`. + :data:`!Placeholder` cannot be passed to :func:`!partial` as a keyword argument. .. versionchanged:: 3.14 Added support for :data:`Placeholder` in positional arguments. diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 480a9dec7f1..7ccb0e6bdf9 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -128,6 +128,11 @@ The :mod:`gc` module provides the following functions: starts. For each collection, all the objects in the young generation and some fraction of the old generation is collected. + In the free-threaded build, the increase in process memory usage is also + checked before running the collector. If the memory usage has not increased + by 10% since the last collection and the net number of object allocations + has not exceeded 40 times *threshold0*, the collection is not run. + The fraction of the old generation that is collected is **inversely** proportional to *threshold1*. The larger *threshold1* is, the slower objects in the old generation are collected. diff --git a/Doc/library/getpass.rst b/Doc/library/getpass.rst index 3b5296f9ec6..0fb0fc88683 100644 --- a/Doc/library/getpass.rst +++ b/Doc/library/getpass.rst @@ -16,7 +16,7 @@ The :mod:`getpass` module provides two functions: -.. function:: getpass(prompt='Password: ', stream=None) +.. function:: getpass(prompt='Password: ', stream=None, *, echo_char=None) Prompt the user for a password without echoing. The user is prompted using the string *prompt*, which defaults to ``'Password: '``. On Unix, the @@ -25,6 +25,12 @@ The :mod:`getpass` module provides two functions: (:file:`/dev/tty`) or if that is unavailable to ``sys.stderr`` (this argument is ignored on Windows). + The *echo_char* argument controls how user input is displayed while typing. + If *echo_char* is ``None`` (default), input remains hidden. Otherwise, + *echo_char* must be a printable ASCII string and each typed character + is replaced by it. For example, ``echo_char='*'`` will display + asterisks instead of the actual input. + If echo free input is unavailable getpass() falls back to printing a warning message to *stream* and reading from ``sys.stdin`` and issuing a :exc:`GetPassWarning`. @@ -33,6 +39,9 @@ The :mod:`getpass` module provides two functions: If you call getpass from within IDLE, the input may be done in the terminal you launched IDLE from rather than the idle window itself. + .. versionchanged:: 3.14 + Added the *echo_char* parameter for keyboard feedback. + .. exception:: GetPassWarning A :exc:`UserWarning` subclass issued when password input may be echoed. diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 684466d354a..59ad1b07f27 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -134,7 +134,7 @@ The :mod:`glob` module defines the following functions: >>> >>> regex = glob.translate('**/*.txt', recursive=True, include_hidden=True) >>> regex - '(?s:(?:.+/)?[^/]*\\.txt)\\Z' + '(?s:(?:.+/)?[^/]*\\.txt)\\z' >>> reobj = re.compile(regex) >>> reobj.match('foo/bar/baz.txt') diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 4222f1bb1f4..c9d96085ef7 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -258,6 +258,10 @@ Example of how to GZIP compress a binary string:: The basic data compression module needed to support the :program:`gzip` file format. + In case gzip (de)compression is a bottleneck, the `python-isal`_ + package speeds up (de)compression with a mostly compatible API. + + .. _python-isal: https://github.com/pycompression/python-isal .. program:: gzip diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index dffb167c747..4818a4944a5 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -20,13 +20,11 @@ -------------- -This module implements a common interface to many different secure hash and -message digest algorithms. Included are the FIPS secure hash algorithms SHA1, -SHA224, SHA256, SHA384, SHA512, (defined in `the FIPS 180-4 standard`_), -the SHA-3 series (defined in `the FIPS 202 standard`_) as well as RSA's MD5 -algorithm (defined in internet :rfc:`1321`). The terms "secure hash" and -"message digest" are interchangeable. Older algorithms were called message -digests. The modern term is secure hash. +This module implements a common interface to many different hash algorithms. +Included are the FIPS secure hash algorithms SHA224, SHA256, SHA384, SHA512, +(defined in `the FIPS 180-4 standard`_), the SHA-3 series (defined in `the FIPS +202 standard`_) as well as the legacy algorithms SHA1 (`formerly part of FIPS`_) +and the MD5 algorithm (defined in internet :rfc:`1321`). .. note:: @@ -272,7 +270,10 @@ a file or file-like object. *fileobj* must be a file-like object opened for reading in binary mode. It accepts file objects from builtin :func:`open`, :class:`~io.BytesIO` instances, SocketIO objects from :meth:`socket.socket.makefile`, and - similar. The function may bypass Python's I/O and use the file descriptor + similar. *fileobj* must be opened in blocking mode, otherwise a + :exc:`BlockingIOError` may be raised. + + The function may bypass Python's I/O and use the file descriptor from :meth:`~io.IOBase.fileno` directly. *fileobj* must be assumed to be in an unknown state after this function returns or raises. It is up to the caller to close *fileobj*. @@ -283,7 +284,7 @@ a file or file-like object. Example: >>> import io, hashlib, hmac - >>> with open(hashlib.__file__, "rb") as f: + >>> with open("library/hashlib.rst", "rb") as f: ... digest = hashlib.file_digest(f, "sha256") ... >>> digest.hexdigest() # doctest: +ELLIPSIS @@ -301,6 +302,10 @@ a file or file-like object. .. versionadded:: 3.11 + .. versionchanged:: 3.14 + Now raises a :exc:`BlockingIOError` if the file is opened in blocking + mode. Previously, spurious null bytes were added to the digest. + Key derivation -------------- @@ -812,6 +817,7 @@ Domain Dedication 1.0 Universal: .. _the FIPS 180-4 standard: https://csrc.nist.gov/pubs/fips/180-4/upd1/final .. _the FIPS 202 standard: https://csrc.nist.gov/pubs/fips/202/final .. _HACL\* project: https://github.com/hacl-star/hacl-star +.. _formerly part of FIPS: https://csrc.nist.gov/news/2023/decision-to-revise-fips-180-4 .. _hashlib-seealso: diff --git a/Doc/library/heapq-binary-tree.svg b/Doc/library/heapq-binary-tree.svg new file mode 100644 index 00000000000..074a9a44275 --- /dev/null +++ b/Doc/library/heapq-binary-tree.svg @@ -0,0 +1,211 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Doc/library/heapq.rst b/Doc/library/heapq.rst index d3c4b920ba5..462b65bc7af 100644 --- a/Doc/library/heapq.rst +++ b/Doc/library/heapq.rst @@ -16,40 +16,56 @@ This module provides an implementation of the heap queue algorithm, also known as the priority queue algorithm. -Heaps are binary trees for which every parent node has a value less than or -equal to any of its children. We refer to this condition as the heap invariant. +Min-heaps are binary trees for which every parent node has a value less than +or equal to any of its children. +We refer to this condition as the heap invariant. -This implementation uses arrays for which -``heap[k] <= heap[2*k+1]`` and ``heap[k] <= heap[2*k+2]`` for all *k*, counting -elements from zero. For the sake of comparison, non-existing elements are -considered to be infinite. The interesting property of a heap is that its -smallest element is always the root, ``heap[0]``. +For min-heaps, this implementation uses lists for which +``heap[k] <= heap[2*k+1]`` and ``heap[k] <= heap[2*k+2]`` for all *k* for which +the compared elements exist. Elements are counted from zero. The interesting +property of a min-heap is that its smallest element is always the root, +``heap[0]``. -The API below differs from textbook heap algorithms in two aspects: (a) We use -zero-based indexing. This makes the relationship between the index for a node -and the indexes for its children slightly less obvious, but is more suitable -since Python uses zero-based indexing. (b) Our pop method returns the smallest -item, not the largest (called a "min heap" in textbooks; a "max heap" is more -common in texts because of its suitability for in-place sorting). +Max-heaps satisfy the reverse invariant: every parent node has a value +*greater* than any of its children. These are implemented as lists for which +``maxheap[2*k+1] <= maxheap[k]`` and ``maxheap[2*k+2] <= maxheap[k]`` for all +*k* for which the compared elements exist. +The root, ``maxheap[0]``, contains the *largest* element; +``heap.sort(reverse=True)`` maintains the max-heap invariant. -These two make it possible to view the heap as a regular Python list without -surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` maintains the -heap invariant! +The :mod:`!heapq` API differs from textbook heap algorithms in two aspects: (a) +We use zero-based indexing. This makes the relationship between the index for +a node and the indexes for its children slightly less obvious, but is more +suitable since Python uses zero-based indexing. (b) Textbooks often focus on +max-heaps, due to their suitability for in-place sorting. Our implementation +favors min-heaps as they better correspond to Python :class:`lists `. -To create a heap, use a list initialized to ``[]``, or you can transform a -populated list into a heap via function :func:`heapify`. +These two aspects make it possible to view the heap as a regular Python list +without surprises: ``heap[0]`` is the smallest item, and ``heap.sort()`` +maintains the heap invariant! -The following functions are provided: +Like :meth:`list.sort`, this implementation uses only the ``<`` operator +for comparisons, for both min-heaps and max-heaps. + +In the API below, and in this documentation, the unqualified term *heap* +generally refers to a min-heap. +The API for max-heaps is named using a ``_max`` suffix. + +To create a heap, use a list initialized as ``[]``, or transform an existing list +into a min-heap or max-heap using the :func:`heapify` or :func:`heapify_max` +functions, respectively. + +The following functions are provided for min-heaps: .. function:: heappush(heap, item) - Push the value *item* onto the *heap*, maintaining the heap invariant. + Push the value *item* onto the *heap*, maintaining the min-heap invariant. .. function:: heappop(heap) - Pop and return the smallest item from the *heap*, maintaining the heap + Pop and return the smallest item from the *heap*, maintaining the min-heap invariant. If the heap is empty, :exc:`IndexError` is raised. To access the smallest item without popping it, use ``heap[0]``. @@ -63,7 +79,7 @@ The following functions are provided: .. function:: heapify(x) - Transform list *x* into a heap, in-place, in linear time. + Transform list *x* into a min-heap, in-place, in linear time. .. function:: heapreplace(heap, item) @@ -82,6 +98,56 @@ The following functions are provided: on the heap. +For max-heaps, the following functions are provided: + + +.. function:: heapify_max(x) + + Transform list *x* into a max-heap, in-place, in linear time. + + .. versionadded:: 3.14 + + +.. function:: heappush_max(heap, item) + + Push the value *item* onto the max-heap *heap*, maintaining the max-heap + invariant. + + .. versionadded:: 3.14 + + +.. function:: heappop_max(heap) + + Pop and return the largest item from the max-heap *heap*, maintaining the + max-heap invariant. If the max-heap is empty, :exc:`IndexError` is raised. + To access the largest item without popping it, use ``maxheap[0]``. + + .. versionadded:: 3.14 + + +.. function:: heappushpop_max(heap, item) + + Push *item* on the max-heap *heap*, then pop and return the largest item + from *heap*. + The combined action runs more efficiently than :func:`heappush_max` + followed by a separate call to :func:`heappop_max`. + + .. versionadded:: 3.14 + + +.. function:: heapreplace_max(heap, item) + + Pop and return the largest item from the max-heap *heap* and also push the + new *item*. + The max-heap size doesn't change. If the max-heap is empty, + :exc:`IndexError` is raised. + + The value returned may be smaller than the *item* added. Refer to the + analogous function :func:`heapreplace` for detailed usage notes. + + .. versionadded:: 3.14 + + The module also offers three general purpose functions based on heaps. @@ -246,17 +312,12 @@ elements are considered to be infinite. The interesting property of a heap is that ``a[0]`` is always its smallest element. The strange invariant above is meant to be an efficient memory representation -for a tournament. The numbers below are *k*, not ``a[k]``:: +for a tournament. The numbers below are *k*, not ``a[k]``: - 0 - - 1 2 - - 3 4 5 6 - - 7 8 9 10 11 12 13 14 - - 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 +.. figure:: heapq-binary-tree.svg + :class: invert-in-dark-mode + :align: center + :alt: Example (min-heap) binary tree. In the tree above, each cell *k* is topping ``2*k+1`` and ``2*k+2``. In a usual binary tournament we see in sports, each cell is the winner over the two cells diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst index 6d433b5a04f..dd67fc34e85 100644 --- a/Doc/library/html.parser.rst +++ b/Doc/library/html.parser.rst @@ -43,7 +43,9 @@ Example HTML Parser Application As a basic example, below is a simple HTML parser that uses the :class:`HTMLParser` class to print out start tags, end tags, and data -as they are encountered:: +as they are encountered: + +.. testcode:: from html.parser import HTMLParser @@ -63,7 +65,7 @@ as they are encountered:: The output will then be: -.. code-block:: none +.. testoutput:: Encountered a start tag: html Encountered a start tag: head @@ -230,7 +232,9 @@ Examples -------- The following class implements a parser that will be used to illustrate more -examples:: +examples: + +.. testcode:: from html.parser import HTMLParser from html.entities import name2codepoint @@ -266,13 +270,17 @@ examples:: parser = MyHTMLParser() -Parsing a doctype:: +Parsing a doctype: + +.. doctest:: >>> parser.feed('') Decl : DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd" -Parsing an element with a few attributes and a title:: +Parsing an element with a few attributes and a title: + +.. doctest:: >>> parser.feed('The Python logo') Start tag: img @@ -285,7 +293,9 @@ Parsing an element with a few attributes and a title:: End tag : h1 The content of ``script`` and ``style`` elements is returned as is, without -further parsing:: +further parsing: + +.. doctest:: >>> parser.feed('') Start tag: style @@ -300,16 +310,25 @@ further parsing:: Data : alert("hello!"); End tag : script -Parsing comments:: +Parsing comments: - >>> parser.feed('' +.. doctest:: + + >>> parser.feed('' ... '') - Comment : a comment + Comment : a comment Comment : [if IE 9]>IE-specific content'``):: +correct char (note: these 3 references are all equivalent to ``'>'``): +.. doctest:: + + >>> parser = MyHTMLParser() + >>> parser.feed('>>>') + Data : >>> + + >>> parser = MyHTMLParser(convert_charrefs=False) >>> parser.feed('>>>') Named ent: > Num ent : > @@ -317,18 +336,22 @@ correct char (note: these 3 references are all equivalent to ``'>'``):: Feeding incomplete chunks to :meth:`~HTMLParser.feed` works, but :meth:`~HTMLParser.handle_data` might be called more than once -(unless *convert_charrefs* is set to ``True``):: +(unless *convert_charrefs* is set to ``True``): - >>> for chunk in ['buff', 'ered ', 'text']: +.. doctest:: + + >>> for chunk in ['buff', 'ered', ' text']: ... parser.feed(chunk) ... Start tag: span Data : buff Data : ered - Data : text + Data : text End tag : span -Parsing invalid HTML (e.g. unquoted attributes) also works:: +Parsing invalid HTML (e.g. unquoted attributes) also works: + +.. doctest:: >>> parser.feed('

tag soup

') Start tag: p diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 54df4a7e804..063344e0284 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -429,8 +429,7 @@ instantiation, of which this module provides three different variants: ``'Last-Modified:'`` header with the file's modification time. Then follows a blank line signifying the end of the headers, and then the - contents of the file are output. If the file's MIME type starts with - ``text/`` the file is opened in text mode; otherwise binary mode is used. + contents of the file are output. For example usage, see the implementation of the ``test`` function in :source:`Lib/http/server.py`. @@ -459,55 +458,6 @@ such as using different index file names by overriding the class attribute :attr:`index_pages`. -.. class:: CGIHTTPRequestHandler(request, client_address, server) - - This class is used to serve either files or output of CGI scripts from the - current directory and below. Note that mapping HTTP hierarchic structure to - local directory structure is exactly as in :class:`SimpleHTTPRequestHandler`. - - .. note:: - - CGI scripts run by the :class:`CGIHTTPRequestHandler` class cannot execute - redirects (HTTP code 302), because code 200 (script output follows) is - sent prior to execution of the CGI script. This pre-empts the status - code. - - The class will however, run the CGI script, instead of serving it as a file, - if it guesses it to be a CGI script. Only directory-based CGI are used --- - the other common server configuration is to treat special extensions as - denoting CGI scripts. - - The :func:`do_GET` and :func:`do_HEAD` functions are modified to run CGI scripts - and serve the output, instead of serving files, if the request leads to - somewhere below the ``cgi_directories`` path. - - The :class:`CGIHTTPRequestHandler` defines the following data member: - - .. attribute:: cgi_directories - - This defaults to ``['/cgi-bin', '/htbin']`` and describes directories to - treat as containing CGI scripts. - - The :class:`CGIHTTPRequestHandler` defines the following method: - - .. method:: do_POST() - - This method serves the ``'POST'`` request type, only allowed for CGI - scripts. Error 501, "Can only POST to CGI scripts", is output when trying - to POST to a non-CGI url. - - Note that CGI scripts will be run with UID of user nobody, for security - reasons. Problems with the CGI script will be translated to error 403. - - .. deprecated-removed:: 3.13 3.15 - - :class:`CGIHTTPRequestHandler` is being removed in 3.15. CGI has not - been considered a good way to do things for well over a decade. This code - has been unmaintained for a while now and sees very little practical use. - Retaining it could lead to further :ref:`security considerations - `. - - .. _http-server-cli: Command-line interface @@ -564,24 +514,6 @@ The following options are accepted: .. versionadded:: 3.11 -.. option:: --cgi - - :class:`CGIHTTPRequestHandler` can be enabled in the command line by passing - the ``--cgi`` option:: - - python -m http.server --cgi - - .. deprecated-removed:: 3.13 3.15 - - :mod:`http.server` command line ``--cgi`` support is being removed - because :class:`CGIHTTPRequestHandler` is being removed. - -.. warning:: - - :class:`CGIHTTPRequestHandler` and the ``--cgi`` command-line option - are not intended for use by untrusted clients and may be vulnerable - to exploitation. Always use within a secure environment. - .. option:: --tls-cert Specifies a TLS certificate chain for HTTPS connections:: diff --git a/Doc/library/importlib.metadata.rst b/Doc/library/importlib.metadata.rst index c84fe9b8786..12014309e26 100644 --- a/Doc/library/importlib.metadata.rst +++ b/Doc/library/importlib.metadata.rst @@ -297,7 +297,7 @@ Distribution files package is not installed in the current Python environment. Returns :const:`None` if the distribution is found but the installation - database records reporting the files associated with the distribuion package + database records reporting the files associated with the distribution package are missing. .. class:: PackagePath diff --git a/Doc/library/importlib.resources.abc.rst b/Doc/library/importlib.resources.abc.rst index 7a77466bcba..8253a33f591 100644 --- a/Doc/library/importlib.resources.abc.rst +++ b/Doc/library/importlib.resources.abc.rst @@ -49,44 +49,44 @@ .. method:: open_resource(resource) :abstractmethod: - Returns an opened, :term:`file-like object` for binary reading - of the *resource*. + Returns an opened, :term:`file-like object` for binary reading + of the *resource*. - If the resource cannot be found, :exc:`FileNotFoundError` is - raised. + If the resource cannot be found, :exc:`FileNotFoundError` is + raised. .. method:: resource_path(resource) :abstractmethod: - Returns the file system path to the *resource*. + Returns the file system path to the *resource*. - If the resource does not concretely exist on the file system, - raise :exc:`FileNotFoundError`. + If the resource does not concretely exist on the file system, + raise :exc:`FileNotFoundError`. .. method:: is_resource(name) :abstractmethod: - Returns ``True`` if the named *name* is considered a resource. - :exc:`FileNotFoundError` is raised if *name* does not exist. + Returns ``True`` if the named *name* is considered a resource. + :exc:`FileNotFoundError` is raised if *name* does not exist. .. method:: contents() :abstractmethod: - Returns an :term:`iterable` of strings over the contents of - the package. Do note that it is not required that all names - returned by the iterator be actual resources, e.g. it is - acceptable to return names for which :meth:`is_resource` would - be false. + Returns an :term:`iterable` of strings over the contents of + the package. Do note that it is not required that all names + returned by the iterator be actual resources, e.g. it is + acceptable to return names for which :meth:`is_resource` would + be false. - Allowing non-resource names to be returned is to allow for - situations where how a package and its resources are stored - are known a priori and the non-resource names would be useful. - For instance, returning subdirectory names is allowed so that - when it is known that the package and resources are stored on - the file system then those subdirectory names can be used - directly. + Allowing non-resource names to be returned is to allow for + situations where how a package and its resources are stored + are known a priori and the non-resource names would be useful. + For instance, returning subdirectory names is allowed so that + when it is known that the package and resources are stored on + the file system then those subdirectory names can be used + directly. - The abstract method returns an iterable of no items. + The abstract method returns an iterable of no items. .. class:: Traversable diff --git a/Doc/library/io.rst b/Doc/library/io.rst index fcd7afea354..de5cab5aee6 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -528,14 +528,13 @@ I/O Base Classes It inherits from :class:`IOBase`. The main difference with :class:`RawIOBase` is that methods :meth:`read`, - :meth:`readinto` and :meth:`write` will try (respectively) to read as much - input as requested or to consume all given output, at the expense of - making perhaps more than one system call. + :meth:`readinto` and :meth:`write` will try (respectively) to read + as much input as requested or to emit all provided data. - In addition, those methods can raise :exc:`BlockingIOError` if the - underlying raw stream is in non-blocking mode and cannot take or give - enough data; unlike their :class:`RawIOBase` counterparts, they will - never return ``None``. + In addition, if the underlying raw stream is in non-blocking mode, when the + system returns would block :meth:`write` will raise :exc:`BlockingIOError` + with :attr:`BlockingIOError.characters_written` and :meth:`read` will return + data read so far or ``None`` if no data is available. Besides, the :meth:`read` method does not have a default implementation that defers to :meth:`readinto`. @@ -568,29 +567,40 @@ I/O Base Classes .. method:: read(size=-1, /) - Read and return up to *size* bytes. If the argument is omitted, ``None``, - or negative, data is read and returned until EOF is reached. An empty - :class:`bytes` object is returned if the stream is already at EOF. + Read and return up to *size* bytes. If the argument is omitted, ``None``, + or negative read as much as possible. - If the argument is positive, and the underlying raw stream is not - interactive, multiple raw reads may be issued to satisfy the byte count - (unless EOF is reached first). But for interactive raw streams, at most - one raw read will be issued, and a short result does not imply that EOF is - imminent. + Fewer bytes may be returned than requested. An empty :class:`bytes` object + is returned if the stream is already at EOF. More than one read may be + made and calls may be retried if specific errors are encountered, see + :meth:`os.read` and :pep:`475` for more details. Less than size bytes + being returned does not imply that EOF is imminent. - A :exc:`BlockingIOError` is raised if the underlying raw stream is in - non blocking-mode, and has no data available at the moment. + When reading as much as possible the default implementation will use + ``raw.readall`` if available (which should implement + :meth:`RawIOBase.readall`), otherwise will read in a loop until read + returns ``None``, an empty :class:`bytes`, or a non-retryable error. For + most streams this is to EOF, but for non-blocking streams more data may + become available. + + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: read1(size=-1, /) - Read and return up to *size* bytes, with at most one call to the - underlying raw stream's :meth:`~RawIOBase.read` (or - :meth:`~RawIOBase.readinto`) method. This can be useful if you are - implementing your own buffering on top of a :class:`BufferedIOBase` - object. + Read and return up to *size* bytes, calling :meth:`~RawIOBase.readinto` + which may retry if :py:const:`~errno.EINTR` is encountered per + :pep:`475`. If *size* is ``-1`` or not provided, the implementation will + choose an arbitrary value for *size*. - If *size* is ``-1`` (the default), an arbitrary number of bytes are - returned (more than zero unless EOF is reached). + .. note:: + + When the underlying raw stream is non-blocking, implementations may + either raise :exc:`BlockingIOError` or return ``None`` if no data is + available. :mod:`io` implementations return ``None``. .. method:: readinto(b, /) @@ -767,34 +777,21 @@ than raw I/O does. .. method:: peek(size=0, /) - Return bytes from the stream without advancing the position. At most one - single read on the raw stream is done to satisfy the call. The number of - bytes returned may be less or more than requested. + Return bytes from the stream without advancing the position. The number of + bytes returned may be less or more than requested. If the underlying raw + stream is non-blocking and the operation would block, returns empty bytes. .. method:: read(size=-1, /) - Read and return *size* bytes, or if *size* is not given or negative, until - EOF or if the read call would block in non-blocking mode. - - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read` .. method:: read1(size=-1, /) - Read and return up to *size* bytes with only one call on the raw stream. - If at least one byte is buffered, only buffered bytes are returned. - Otherwise, one raw stream read call is made. + In :class:`BufferedReader` this is the same as :meth:`io.BufferedIOBase.read1` .. versionchanged:: 3.7 The *size* argument is now optional. - .. note:: - - When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` - may be raised if a read operation cannot be completed immediately. - .. class:: BufferedWriter(raw, buffer_size=DEFAULT_BUFFER_SIZE) A buffered binary stream providing higher-level access to a writeable, non @@ -826,8 +823,8 @@ than raw I/O does. Write the :term:`bytes-like object`, *b*, and return the number of bytes written. When in non-blocking mode, a - :exc:`BlockingIOError` is raised if the buffer needs to be written out but - the raw stream blocks. + :exc:`BlockingIOError` with :attr:`BlockingIOError.characters_written` set + is raised if the buffer needs to be written out but the raw stream blocks. .. class:: BufferedRandom(raw, buffer_size=DEFAULT_BUFFER_SIZE) @@ -894,9 +891,10 @@ Text I/O .. attribute:: buffer - The underlying binary buffer (a :class:`BufferedIOBase` instance) that - :class:`TextIOBase` deals with. This is not part of the - :class:`TextIOBase` API and may not exist in some implementations. + The underlying binary buffer (a :class:`BufferedIOBase` + or :class:`RawIOBase` instance) that :class:`TextIOBase` deals with. + This is not part of the :class:`TextIOBase` API and may not exist + in some implementations. .. method:: detach() @@ -965,7 +963,8 @@ Text I/O :class:`TextIOBase`. *encoding* gives the name of the encoding that the stream will be decoded or - encoded with. It defaults to :func:`locale.getencoding`. + encoded with. In :ref:`UTF-8 Mode `, this defaults to UTF-8. + Otherwise, it defaults to :func:`locale.getencoding`. ``encoding="locale"`` can be used to specify the current locale's encoding explicitly. See :ref:`io-text-encoding` for more information. diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 61e413b63cd..00925ae920a 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -1009,6 +1009,12 @@ The following recipes have a more mathematical flavor: .. testcode:: + def multinomial(*counts): + "Number of distinct arrangements of a multiset." + # Counter('abracadabra').values() → 5 2 2 1 1 + # multinomial(5, 2, 2, 1, 1) → 83160 + return prod(map(comb, accumulate(counts), counts)) + def powerset(iterable): "Subsequences of the iterable from shortest to longest." # powerset([1,2,3]) → () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3) @@ -1127,12 +1133,6 @@ The following recipes have a more mathematical flavor: n -= n // prime return n - def multinomial(*counts): - "Number of distinct arrangements of a multiset." - # Counter('abracadabra').values() -> 5 2 1 1 2 - # multinomial(5, 2, 1, 1, 2) → 83160 - return prod(map(comb, accumulate(counts), counts)) - .. doctest:: :hide: @@ -1736,7 +1736,7 @@ The following recipes have a more mathematical flavor: >>> ''.join(it) 'DEF1' - >>> multinomial(5, 2, 1, 1, 2) + >>> multinomial(5, 2, 2, 1, 1) 83160 >>> word = 'coffee' >>> multinomial(*Counter(word).values()) == len(set(permutations(word))) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 26579ec6328..12a5a96a3c5 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,12 +18,17 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. note:: + The term "object" in the context of JSON processing in Python can be + ambiguous. All values in Python are objects. In JSON, an object refers to + any data wrapped in curly braces, similar to a Python dictionary. + .. warning:: Be cautious when parsing JSON data from untrusted sources. A malicious JSON string may cause the decoder to consume considerable CPU and memory resources. Limiting the size of data to be parsed is recommended. -:mod:`json` exposes an API familiar to users of the standard library +This module exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. Encoding basic Python object hierarchies:: @@ -60,7 +65,7 @@ Pretty printing:: "6": 7 } -Specializing JSON object encoding:: +Customizing JSON object encoding:: >>> import json >>> def custom_json(obj): @@ -83,7 +88,7 @@ Decoding JSON:: >>> json.load(io) ['streaming API'] -Specializing JSON object decoding:: +Customizing JSON object decoding:: >>> import json >>> def as_complex(dct): @@ -279,7 +284,7 @@ Basic Usage :param object_hook: If set, a function that is called with the result of - any object literal decoded (a :class:`dict`). + any JSON object literal decoded (a :class:`dict`). The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders, @@ -289,7 +294,7 @@ Basic Usage :param object_pairs_hook: If set, a function that is called with the result of - any object literal decoded with an ordered list of pairs. + any JSON object literal decoded with an ordered list of pairs. The return value of this function will be used instead of the :class:`dict`. This feature can be used to implement custom decoders. diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index ffb54591b35..63ef533e82c 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -1148,6 +1148,13 @@ possible, while any potentially slow operations (such as sending an email via .. versionchanged:: 3.5 The ``respect_handler_level`` argument was added. + .. versionchanged:: 3.14 + :class:`QueueListener` can now be used as a context manager via + :keyword:`with`. When entering the context, the listener is started. When + exiting the context, the listener is stopped. + :meth:`~contextmanager.__enter__` returns the + :class:`QueueListener` object. + .. method:: dequeue(block) Dequeues a record and return it, optionally blocking. @@ -1179,6 +1186,10 @@ possible, while any potentially slow operations (such as sending an email via This starts up a background thread to monitor the queue for LogRecords to process. + .. versionchanged:: 3.14 + Raises :exc:`RuntimeError` if called and the listener is already + running. + .. method:: stop() Stops the listener. diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst index 72190e97240..4509da58916 100644 --- a/Doc/library/logging.rst +++ b/Doc/library/logging.rst @@ -1342,8 +1342,9 @@ functions. .. function:: basicConfig(**kwargs) - Does basic configuration for the logging system by creating a - :class:`StreamHandler` with a default :class:`Formatter` and adding it to the + Does basic configuration for the logging system by either creating a + :class:`StreamHandler` with a default :class:`Formatter` + or using the given *formatter* instance, and adding it to the root logger. The functions :func:`debug`, :func:`info`, :func:`warning`, :func:`error` and :func:`critical` will call :func:`basicConfig` automatically if no handlers are defined for the root logger. @@ -1428,6 +1429,19 @@ functions. | | which means that it will be treated the | | | same as passing 'errors'. | +--------------+---------------------------------------------+ + | *formatter* | If specified, set this formatter instance | + | | (see :ref:`formatter-objects`) | + | | for all involved handlers. | + | | If not specified, the default is to create | + | | and use an instance of | + | | :class:`logging.Formatter` based on | + | | arguments *format*, *datefmt* and *style*. | + | | When *formatter* is specified together with | + | | any of the three arguments *format*, | + | | *datefmt* and *style*, a ``ValueError`` is | + | | raised to signal that these arguments would | + | | lose meaning otherwise. | + +--------------+---------------------------------------------+ .. versionchanged:: 3.2 The *style* argument was added. @@ -1444,6 +1458,9 @@ functions. .. versionchanged:: 3.9 The *encoding* and *errors* arguments were added. + .. versionchanged:: 3.15 + The *formatter* argument was added. + .. function:: shutdown() Informs the logging system to perform an orderly shutdown by flushing and diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 0749367045d..394a462b946 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -144,8 +144,7 @@ Number-theoretic functions .. function:: factorial(n) - Return *n* factorial as an integer. Raises :exc:`ValueError` if *n* is not integral or - is negative. + Return factorial of the nonnegative integer *n*. .. versionchanged:: 3.10 Floats with integral values (like ``5.0``) are no longer accepted. diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst index 5af69455032..13511b16a0e 100644 --- a/Doc/library/mimetypes.rst +++ b/Doc/library/mimetypes.rst @@ -301,13 +301,18 @@ than one MIME-type database; it provides an interface similar to the one of the .. method:: MimeTypes.add_type(type, ext, strict=True) - Add a mapping from the MIME type *type* to the extension *ext*. When the + Add a mapping from the MIME type *type* to the extension *ext*. + Valid extensions start with a '.' or are empty. When the extension is already known, the new type will replace the old one. When the type is already known the extension will be added to the list of known extensions. When *strict* is ``True`` (the default), the mapping will be added to the official MIME types, otherwise to the non-standard ones. + .. deprecated-removed:: 3.14 3.16 + Invalid, undotted extensions will raise a + :exc:`ValueError` in Python 3.16. + .. _mimetypes-cli: diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 6ccc0d4aa59..fc3c1134f97 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -670,6 +670,25 @@ The :mod:`multiprocessing` package mostly replicates the API of the .. versionadded:: 3.3 + .. method:: interrupt() + + Terminate the process. Works on POSIX using the :py:const:`~signal.SIGINT` signal. + Behavior on Windows is undefined. + + By default, this terminates the child process by raising :exc:`KeyboardInterrupt`. + This behavior can be altered by setting the respective signal handler in the child + process :func:`signal.signal` for :py:const:`~signal.SIGINT`. + + Note: if the child process catches and discards :exc:`KeyboardInterrupt`, the + process will not be terminated. + + Note: the default behavior will also set :attr:`exitcode` to ``1`` as if an + uncaught exception was raised in the child process. To have a different + :attr:`exitcode` you may simply catch :exc:`KeyboardInterrupt` and call + ``exit(your_code)``. + + .. versionadded:: 3.14 + .. method:: terminate() Terminate the process. On POSIX this is done using the :py:const:`~signal.SIGTERM` signal; @@ -1062,7 +1081,7 @@ Miscellaneous .. function:: freeze_support() Add support for when a program which uses :mod:`multiprocessing` has been - frozen to produce a Windows executable. (Has been tested with **py2exe**, + frozen to produce an executable. (Has been tested with **py2exe**, **PyInstaller** and **cx_Freeze**.) One needs to call this function straight after the ``if __name__ == @@ -1080,10 +1099,10 @@ Miscellaneous If the ``freeze_support()`` line is omitted then trying to run the frozen executable will raise :exc:`RuntimeError`. - Calling ``freeze_support()`` has no effect when invoked on any operating - system other than Windows. In addition, if the module is being run - normally by the Python interpreter on Windows (the program has not been - frozen), then ``freeze_support()`` has no effect. + Calling ``freeze_support()`` has no effect when the start method is not + *spawn*. In addition, if the module is being run normally by the Python + interpreter (the program has not been frozen), then ``freeze_support()`` + has no effect. .. function:: get_all_start_methods() @@ -1350,6 +1369,12 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + .. method:: locked() + + Return a boolean indicating whether this object is locked right now. + + .. versionadded:: 3.14 + .. note:: On macOS, this is indistinguishable from :class:`Semaphore` because ``sem_getvalue()`` is not implemented on that platform. @@ -1502,6 +1527,12 @@ object -- see :ref:`multiprocessing-managers`. A solitary difference from its close analog exists: its ``acquire`` method's first argument is named *block*, as is consistent with :meth:`Lock.acquire`. + .. method:: locked() + + Return a boolean indicating whether this object is locked right now. + + .. versionadded:: 3.14 + .. note:: On macOS, ``sem_timedwait`` is unsupported, so calling ``acquire()`` with diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 54a5d3b98e8..1e54cfec609 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -2338,6 +2338,7 @@ features: This function can support specifying *src_dir_fd* and/or *dst_dir_fd* to supply :ref:`paths relative to directory descriptors `, and :ref:`not following symlinks `. + The default value of *follow_symlinks* is ``False`` on Windows. .. audit-event:: os.link src,dst,src_dir_fd,dst_dir_fd os.link diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 708a16e6bc8..86351e65dc4 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -871,6 +871,12 @@ conforming to :rfc:`8089`. .. versionadded:: 3.13 + .. versionchanged:: 3.14 + The URL authority is discarded if it matches the local hostname. + Otherwise, if the authority isn't empty or ``localhost``, then on + Windows a UNC path is returned (as before), and on other platforms a + :exc:`ValueError` is raised. + .. method:: Path.as_uri() @@ -1775,9 +1781,12 @@ The following wildcards are supported in patterns for ``?`` Matches one non-separator character. ``[seq]`` - Matches one character in *seq*. + Matches one character in *seq*, where *seq* is a sequence of characters. + Range expressions are supported; for example, ``[a-z]`` matches any lowercase ASCII letter. + Multiple ranges can be combined: ``[a-zA-Z0-9_]`` matches any ASCII letter, digit, or underscore. + ``[!seq]`` - Matches one character not in *seq*. + Matches one character not in *seq*, where *seq* follows the same rules as above. For a literal match, wrap the meta-characters in brackets. For example, ``"[?]"`` matches the character ``"?"``. diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index 8601f390aeb..f4b51664545 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -80,7 +80,7 @@ The debugger's prompt is ``(Pdb)``, which is the indicator that you are in debug You can also invoke :mod:`pdb` from the command line to debug other scripts. For example:: - python -m pdb [-c command] (-m module | pyfile) [args ...] + python -m pdb [-c command] (-m module | -p pid | pyfile) [args ...] When invoked as a module, pdb will automatically enter post-mortem debugging if the program being debugged exits abnormally. After post-mortem debugging (or @@ -104,6 +104,24 @@ useful than quitting the debugger upon program's exit. .. versionchanged:: 3.7 Added the ``-m`` option. +.. option:: -p, --pid + + Attach to the process with the specified PID. + + .. versionadded:: 3.14 + + +To attach to a running Python process for remote debugging, use the ``-p`` or +``--pid`` option with the target process's PID:: + + python -m pdb -p 1234 + +.. note:: + + Attaching to a process that is blocked in a system call or waiting for I/O + will only work once the next bytecode instruction is executed or when the + process receives a signal. + Typical usage to execute a statement under control of the debugger is:: >>> import pdb @@ -188,6 +206,21 @@ slightly different way: .. versionadded:: 3.14 The *commands* argument. + +.. awaitablefunction:: set_trace_async(*, header=None, commands=None) + + async version of :func:`set_trace`. This function should be used inside an + async function with :keyword:`await`. + + .. code-block:: python + + async def f(): + await pdb.set_trace_async() + + :keyword:`await` statements are supported if the debugger is invoked by this function. + + .. versionadded:: 3.14 + .. function:: post_mortem(t=None) Enter post-mortem debugging of the given exception or @@ -228,7 +261,7 @@ The ``run*`` functions and :func:`set_trace` are aliases for instantiating the access further features, you have to do this yourself: .. class:: Pdb(completekey='tab', stdin=None, stdout=None, skip=None, \ - nosigint=False, readrc=True, mode=None, backend=None) + nosigint=False, readrc=True, mode=None, backend=None, colorize=False) :class:`Pdb` is the debugger class. @@ -258,6 +291,9 @@ access further features, you have to do this yourself: is passed, the default backend will be used. See :func:`set_default_backend`. Otherwise the supported backends are ``'settrace'`` and ``'monitoring'``. + The *colorize* argument, if set to ``True``, will enable colorized output in the + debugger, if color is supported. This will highlight source code displayed in pdb. + Example call to enable tracing with *skip*:: import pdb; pdb.Pdb(skip=['django.*']).set_trace() @@ -280,6 +316,9 @@ access further features, you have to do this yourself: .. versionadded:: 3.14 Added the *backend* argument. + .. versionadded:: 3.14 + Added the *colorize* argument. + .. versionchanged:: 3.14 Inline breakpoints like :func:`breakpoint` or :func:`pdb.set_trace` will always stop the program at calling frame, ignoring the *skip* pattern (if any). diff --git a/Doc/library/platform.rst b/Doc/library/platform.rst index cfe1e7ba48d..06de152a742 100644 --- a/Doc/library/platform.rst +++ b/Doc/library/platform.rst @@ -17,7 +17,7 @@ section. -Cross Platform +Cross platform -------------- @@ -188,25 +188,7 @@ Cross Platform :attr:`processor` is resolved late instead of immediately. -Java Platform -------------- - - -.. function:: java_ver(release='', vendor='', vminfo=('','',''), osinfo=('','','')) - - Version interface for Jython. - - Returns a tuple ``(release, vendor, vminfo, osinfo)`` with *vminfo* being a - tuple ``(vm_name, vm_release, vm_vendor)`` and *osinfo* being a tuple - ``(os_name, os_version, os_arch)``. Values which cannot be determined are set to - the defaults given as parameters (which all default to ``''``). - - .. deprecated-removed:: 3.13 3.15 - It was largely untested, had a confusing API, - and was only useful for Jython support. - - -Windows Platform +Windows platform ---------------- @@ -240,7 +222,7 @@ Windows Platform .. versionadded:: 3.8 -macOS Platform +macOS platform -------------- .. function:: mac_ver(release='', versioninfo=('','',''), machine='') @@ -252,7 +234,7 @@ macOS Platform Entries which cannot be determined are set to ``''``. All tuple entries are strings. -iOS Platform +iOS platform ------------ .. function:: ios_ver(system='', release='', model='', is_simulator=False) @@ -271,7 +253,7 @@ iOS Platform parameters. -Unix Platforms +Unix platforms -------------- .. function:: libc_ver(executable=sys.executable, lib='', version='', chunksize=16384) @@ -287,7 +269,7 @@ Unix Platforms The file is read and scanned in chunks of *chunksize* bytes. -Linux Platforms +Linux platforms --------------- .. function:: freedesktop_os_release() @@ -325,7 +307,7 @@ Linux Platforms .. versionadded:: 3.10 -Android Platform +Android platform ---------------- .. function:: android_ver(release="", api_level=0, manufacturer="", \ @@ -360,6 +342,34 @@ Android Platform .. versionadded:: 3.13 +.. _platform-cli: + +Command-line usage +------------------ + +:mod:`platform` can also be invoked directly using the :option:`-m` +switch of the interpreter:: + + python -m platform [--terse] [--nonaliased] [{nonaliased,terse} ...] + +The following options are accepted: + +.. program:: platform + +.. option:: --terse + + Print terse information about the platform. This is equivalent to + calling :func:`platform.platform` with the *terse* argument set to ``True``. + +.. option:: --nonaliased + + Print platform information without system/OS name aliasing. This is + equivalent to calling :func:`platform.platform` with the *aliased* argument + set to ``True``. + +You can also pass one or more positional arguments (``terse``, ``nonaliased``) +to explicitly control the output format. These behave similarly to their +corresponding options. Miscellaneous ------------- diff --git a/Doc/library/plistlib.rst b/Doc/library/plistlib.rst index c3a133e46a6..415c4b45c4f 100644 --- a/Doc/library/plistlib.rst +++ b/Doc/library/plistlib.rst @@ -147,8 +147,9 @@ The following classes are available: Wraps an :class:`int`. This is used when reading or writing NSKeyedArchiver encoded data, which contains UID (see PList manual). - It has one attribute, :attr:`data`, which can be used to retrieve the int value - of the UID. :attr:`data` must be in the range ``0 <= data < 2**64``. + .. attribute:: data + + Int value of the UID. It must be in the range ``0 <= data < 2**64``. .. versionadded:: 3.8 diff --git a/Doc/library/re.rst b/Doc/library/re.rst index a91bac53fb4..75ebbf11c8e 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -266,7 +266,7 @@ The special characters are: not a word boundary as outside a set, and numeric escapes such as ``\1`` are always octal escapes, not group references. Special sequences which do not match a single character such as ``\A`` - and ``\Z`` are not allowed. + and ``\z`` are not allowed. .. index:: single: ^ (caret); in regular expressions @@ -661,10 +661,16 @@ character ``'$'``. matches characters which are neither alphanumeric in the current locale nor the underscore. -.. index:: single: \Z; in regular expressions +.. index:: single: \z; in regular expressions + single: \Z; in regular expressions + +``\z`` + Matches only at the end of the string. + + .. versionadded:: 3.14 ``\Z`` - Matches only at the end of the string. + The same as ``\z``. For compatibility with old Python versions. .. index:: single: \a; in regular expressions @@ -985,8 +991,8 @@ Functions That way, separator components are always found at the same relative indices within the result list. - Empty matches for the pattern split the string only when not adjacent - to a previous empty match. + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. .. code:: pycon @@ -1089,9 +1095,12 @@ Functions The optional argument *count* is the maximum number of pattern occurrences to be replaced; *count* must be a non-negative integer. If omitted or zero, all - occurrences will be replaced. Empty matches for the pattern are replaced only - when not adjacent to a previous empty match, so ``sub('x*', '-', 'abxd')`` returns - ``'-a-b--d-'``. + occurrences will be replaced. + + Adjacent empty matches are not possible, but an empty match can occur + immediately after a non-empty match. + As a result, ``sub('x*', '-', 'abxd')`` returns ``'-a-b--d-'`` + instead of ``'-a-b-d-'``. .. index:: single: \g; in regular expressions @@ -1122,8 +1131,7 @@ Functions .. versionchanged:: 3.7 Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter now are errors. - Empty matches for the pattern are replaced when adjacent to a previous - non-empty match. + An empty match can occur immediately after a non-empty match. .. versionchanged:: 3.12 Group *id* can only contain ASCII digits. diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst index f297bdeec91..f649fce5efc 100644 --- a/Doc/library/readline.rst +++ b/Doc/library/readline.rst @@ -72,6 +72,12 @@ The following functions relate to the init file and user configuration: Execute a readline initialization file. The default filename is the last filename used. This calls :c:func:`!rl_read_init_file` in the underlying library. + It raises an :ref:`auditing event ` ``open`` with the file name + if given, and :code:`""` otherwise, regardless of + which file the library resolves. + + .. versionchanged:: 3.14 + The auditing event was added. Line buffer @@ -109,14 +115,24 @@ The following functions operate on a history file: Load a readline history file, and append it to the history list. The default filename is :file:`~/.history`. This calls - :c:func:`!read_history` in the underlying library. + :c:func:`!read_history` in the underlying library + and raises an :ref:`auditing event ` ``open`` with the file + name if given and :code:`"~/.history"` otherwise. + + .. versionchanged:: 3.14 + The auditing event was added. .. function:: write_history_file([filename]) Save the history list to a readline history file, overwriting any existing file. The default filename is :file:`~/.history`. This calls - :c:func:`!write_history` in the underlying library. + :c:func:`!write_history` in the underlying library and raises an + :ref:`auditing event ` ``open`` with the file name if given and + :code:`"~/.history"` otherwise. + + .. versionchanged:: 3.14 + The auditing event was added. .. function:: append_history_file(nelements[, filename]) @@ -125,10 +141,14 @@ The following functions operate on a history file: :file:`~/.history`. The file must already exist. This calls :c:func:`!append_history` in the underlying library. This function only exists if Python was compiled for a version of the library - that supports it. + that supports it. It raises an :ref:`auditing event ` ``open`` + with the file name if given and :code:`"~/.history"` otherwise. .. versionadded:: 3.5 + .. versionchanged:: 3.14 + The auditing event was added. + .. function:: get_history_length() set_history_length(length) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 1819f708890..c78dfe1aafa 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -454,6 +454,10 @@ Directory and files operations :envvar:`PATH` environment variable is read from :data:`os.environ`, falling back to :data:`os.defpath` if it is not set. + If *cmd* contains a directory component, :func:`!which` only checks the + specified path directly and does not search the directories listed in + *path* or in the system's :envvar:`PATH` environment variable. + On Windows, the current directory is prepended to the *path* if *mode* does not include ``os.X_OK``. When the *mode* does include ``os.X_OK``, the Windows API ``NeedCurrentDirectoryForExePathW`` will be consulted to @@ -473,7 +477,7 @@ Directory and files operations This is also applied when *cmd* is a path that contains a directory component:: - >> shutil.which("C:\\Python33\\python") + >>> shutil.which("C:\\Python33\\python") 'C:\\Python33\\python.EXE' .. versionadded:: 3.3 diff --git a/Doc/library/signal.rst b/Doc/library/signal.rst index 17fcb2b3707..b0307d3dea1 100644 --- a/Doc/library/signal.rst +++ b/Doc/library/signal.rst @@ -211,8 +211,8 @@ The variables defined in the :mod:`signal` module are: .. data:: SIGSTKFLT - Stack fault on coprocessor. The Linux kernel does not raise this signal: it - can only be raised in user space. + Stack fault on coprocessor. The Linux kernel does not raise this signal: it + can only be raised in user space. .. availability:: Linux. @@ -510,10 +510,12 @@ The :mod:`signal` module defines the following functions: .. function:: set_wakeup_fd(fd, *, warn_on_full_buffer=True) - Set the wakeup file descriptor to *fd*. When a signal is received, the - signal number is written as a single byte into the fd. This can be used by - a library to wakeup a poll or select call, allowing the signal to be fully - processed. + Set the wakeup file descriptor to *fd*. When a signal your program has + registered a signal handler for is received, the signal number is written as + a single byte into the fd. If you haven't registered a signal handler for + the signals you care about, then nothing will be written to the wakeup fd. + This can be used by a library to wakeup a poll or select call, allowing the + signal to be fully processed. The old wakeup fd is returned (or -1 if file descriptor wakeup was not enabled). If *fd* is -1, file descriptor wakeup is disabled. diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index 1bc7f76b5ba..75fd637045d 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -149,23 +149,37 @@ created. Socket addresses are represented as follows: :const:`BDADDR_LE_RANDOM`. .. versionchanged:: 3.14 - Added ``cid`` and ``bdaddr_type`` fields. + Added ``cid`` and ``bdaddr_type`` fields. - :const:`BTPROTO_RFCOMM` accepts ``(bdaddr, channel)`` where ``bdaddr`` is the Bluetooth address as a string and ``channel`` is an integer. - - :const:`BTPROTO_HCI` accepts ``(device_id,)`` where ``device_id`` is - either an integer or a string with the Bluetooth address of the - interface. (This depends on your OS; NetBSD and DragonFlyBSD expect - a Bluetooth address while everything else expects an integer.) + - :const:`BTPROTO_HCI` accepts a format that depends on your OS. + + - On Linux it accepts an integer ``device_id`` or a tuple + ``(device_id, [channel])`` where ``device_id`` + specifies the number of the Bluetooth device, + and ``channel`` is an optional integer specifying the HCI channel + (:const:`HCI_CHANNEL_RAW` by default). + - On FreeBSD, NetBSD and DragonFly BSD it accepts ``bdaddr`` + where ``bdaddr`` is the Bluetooth address as a string. .. versionchanged:: 3.2 NetBSD and DragonFlyBSD support added. - - :const:`BTPROTO_SCO` accepts ``bdaddr`` where ``bdaddr`` is a - :class:`bytes` object containing the Bluetooth address in a - string format. (ex. ``b'12:23:34:45:56:67'``) This protocol is not - supported under FreeBSD. + .. versionchanged:: 3.13.3 + FreeBSD support added. + + .. versionchanged:: 3.14 + Added ``channel`` field. + ``device_id`` not packed in a tuple is now accepted. + + - :const:`BTPROTO_SCO` accepts ``bdaddr`` where ``bdaddr`` is + the Bluetooth address as a string or a :class:`bytes` object. + (ex. ``'12:23:34:45:56:67'`` or ``b'12:23:34:45:56:67'``) + + .. versionchanged:: 3.14 + FreeBSD support added. - :const:`AF_ALG` is a Linux-only socket based interface to Kernel cryptography. An algorithm socket is configured with a tuple of two to four @@ -348,10 +362,10 @@ Exceptions Constants ^^^^^^^^^ - The AF_* and SOCK_* constants are now :class:`AddressFamily` and - :class:`SocketKind` :class:`.IntEnum` collections. +The AF_* and SOCK_* constants are now :class:`AddressFamily` and +:class:`SocketKind` :class:`.IntEnum` collections. - .. versionadded:: 3.4 +.. versionadded:: 3.4 .. data:: AF_UNIX AF_INET @@ -462,8 +476,8 @@ Constants network interface instead of its name. .. versionchanged:: 3.14 - Added missing ``IP_RECVERR``, ``IPV6_RECVERR``, ``IP_RECVTTL``, and - ``IP_RECVORIGDSTADDR`` on Linux. + Added missing ``IP_FREEBIND``, ``IP_RECVERR``, ``IPV6_RECVERR``, + ``IP_RECVTTL``, and ``IP_RECVORIGDSTADDR`` on Linux. .. versionchanged:: 3.14 Added support for ``TCP_QUICKACK`` on Windows platforms when available. @@ -484,6 +498,9 @@ Constants .. versionchanged:: 3.11 NetBSD support was added. + .. versionchanged:: 3.14 + Restored missing ``CAN_RAW_ERR_FILTER`` on Linux. + .. data:: CAN_BCM CAN_BCM_* @@ -660,16 +677,74 @@ Constants These constants describe the Bluetooth address type when binding or connecting a :const:`BTPROTO_L2CAP` socket. - .. versionadded:: 3.14 + .. availability:: Linux, FreeBSD + + .. versionadded:: 3.14 + +.. data:: SOL_RFCOMM + SOL_L2CAP + SOL_HCI + SOL_SCO + SOL_BLUETOOTH + + Used in the level argument to the :meth:`~socket.setsockopt` and + :meth:`~socket.getsockopt` methods of Bluetooth socket objects. + + :const:`SOL_BLUETOOTH` is only available on Linux. Other constants + are available if the corresponding protocol is supported. + +.. data:: SO_L2CAP_* + L2CAP_LM + L2CAP_LM_* + SO_RFCOMM_* + RFCOMM_LM_* + SO_SCO_* + SO_BTH_* + BT_* + + Used in the option name and value argument to the :meth:`~socket.setsockopt` + and :meth:`~socket.getsockopt` methods of Bluetooth socket objects. + + :const:`!BT_*` and :const:`L2CAP_LM` are only available on Linux. + :const:`!SO_BTH_*` are only available on Windows. + Other constants may be available on Linux and various BSD platforms. + + .. versionadded:: 3.14 .. data:: HCI_FILTER HCI_TIME_STAMP HCI_DATA_DIR + SO_HCI_EVT_FILTER + SO_HCI_PKT_FILTER - For use with :const:`BTPROTO_HCI`. :const:`HCI_FILTER` is not - available for NetBSD or DragonFlyBSD. :const:`HCI_TIME_STAMP` and - :const:`HCI_DATA_DIR` are not available for FreeBSD, NetBSD, or - DragonFlyBSD. + Option names for use with :const:`BTPROTO_HCI`. + Availability and format of the option values depend on platform. + + .. versionchanged:: 3.14 + Added :const:`!SO_HCI_EVT_FILTER` and :const:`!SO_HCI_PKT_FILTER` + on NetBSD and DragonFly BSD. + Added :const:`!HCI_DATA_DIR` on FreeBSD, NetBSD and DragonFly BSD. + +.. data:: HCI_DEV_NONE + + The ``device_id`` value used to create an HCI socket that isn't specific + to a single Bluetooth adapter. + + .. availability:: Linux + + .. versionadded:: 3.14 + +.. data:: HCI_CHANNEL_RAW + HCI_CHANNEL_USER + HCI_CHANNEL_MONITOR + HCI_CHANNEL_CONTROL + HCI_CHANNEL_LOGGING + + Possible values for ``channel`` field in the :const:`BTPROTO_HCI` address. + + .. availability:: Linux + + .. versionadded:: 3.14 .. data:: AF_QIPCRTR @@ -698,9 +773,9 @@ Constants Constant to optimize CPU locality, to be used in conjunction with :data:`SO_REUSEPORT`. - .. versionadded:: 3.11 + .. versionadded:: 3.11 - .. availability:: Linux >= 3.9 + .. availability:: Linux >= 3.9 .. data:: SO_REUSEPORT_LB diff --git a/Doc/library/socketserver.rst b/Doc/library/socketserver.rst index 59cfa136a3b..753f12460b8 100644 --- a/Doc/library/socketserver.rst +++ b/Doc/library/socketserver.rst @@ -24,6 +24,8 @@ There are four basic concrete server classes: :meth:`~BaseServer.server_activate`. The other parameters are passed to the :class:`BaseServer` base class. + .. versionchanged:: next + The default queue size is now ``socket.SOMAXCONN`` for :class:`socketserver.TCPServer`. .. class:: UDPServer(server_address, RequestHandlerClass, bind_and_activate=True) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index c615650b622..2d0f9a740c6 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -259,10 +259,10 @@ Reference Module functions ^^^^^^^^^^^^^^^^ -.. function:: connect(database, timeout=5.0, detect_types=0, \ +.. function:: connect(database, *, timeout=5.0, detect_types=0, \ isolation_level="DEFERRED", check_same_thread=True, \ factory=sqlite3.Connection, cached_statements=128, \ - uri=False, *, \ + uri=False, \ autocommit=sqlite3.LEGACY_TRANSACTION_CONTROL) Open a connection to an SQLite database. @@ -355,11 +355,8 @@ Module functions .. versionchanged:: 3.12 Added the *autocommit* parameter. - .. versionchanged:: 3.13 - Positional use of the parameters *timeout*, *detect_types*, - *isolation_level*, *check_same_thread*, *factory*, *cached_statements*, - and *uri* is deprecated. - They will become keyword-only parameters in Python 3.15. + .. versionchanged:: 3.15 + All parameters except *database* are now keyword-only. .. function:: complete_statement(statement) @@ -693,7 +690,7 @@ Connection objects :meth:`~Cursor.executescript` on it with the given *sql_script*. Return the new cursor object. - .. method:: create_function(name, narg, func, *, deterministic=False) + .. method:: create_function(name, narg, func, /, *, deterministic=False) Create or remove a user-defined SQL function. @@ -719,6 +716,9 @@ Connection objects .. versionchanged:: 3.8 Added the *deterministic* parameter. + .. versionchanged:: 3.15 + The first three parameters are now positional-only. + Example: .. doctest:: @@ -733,13 +733,8 @@ Connection objects ('acbd18db4cc2f85cedef654fccc4a4d8',) >>> con.close() - .. versionchanged:: 3.13 - Passing *name*, *narg*, and *func* as keyword arguments is deprecated. - These parameters will become positional-only in Python 3.15. - - - .. method:: create_aggregate(name, n_arg, aggregate_class) + .. method:: create_aggregate(name, n_arg, aggregate_class, /) Create or remove a user-defined SQL aggregate function. @@ -763,6 +758,9 @@ Connection objects Set to ``None`` to remove an existing SQL aggregate function. :type aggregate_class: :term:`class` | None + .. versionchanged:: 3.15 + All three parameters are now positional-only. + Example: .. testcode:: @@ -792,11 +790,6 @@ Connection objects 3 - .. versionchanged:: 3.13 - - Passing *name*, *n_arg*, and *aggregate_class* as keyword arguments is deprecated. - These parameters will become positional-only in Python 3.15. - .. method:: create_window_function(name, num_params, aggregate_class, /) @@ -937,7 +930,7 @@ Connection objects Aborted queries will raise an :exc:`OperationalError`. - .. method:: set_authorizer(authorizer_callback) + .. method:: set_authorizer(authorizer_callback, /) Register :term:`callable` *authorizer_callback* to be invoked for each attempt to access a column of a table in the database. @@ -962,12 +955,11 @@ Connection objects .. versionchanged:: 3.11 Added support for disabling the authorizer using ``None``. - .. versionchanged:: 3.13 - Passing *authorizer_callback* as a keyword argument is deprecated. - The parameter will become positional-only in Python 3.15. + .. versionchanged:: 3.15 + The only parameter is now positional-only. - .. method:: set_progress_handler(progress_handler, n) + .. method:: set_progress_handler(progress_handler, /, n) Register :term:`callable` *progress_handler* to be invoked for every *n* instructions of the SQLite virtual machine. This is useful if you want to @@ -981,12 +973,11 @@ Connection objects currently executing query and cause it to raise a :exc:`DatabaseError` exception. - .. versionchanged:: 3.13 - Passing *progress_handler* as a keyword argument is deprecated. - The parameter will become positional-only in Python 3.15. + .. versionchanged:: 3.15 + The first parameter is now positional-only. - .. method:: set_trace_callback(trace_callback) + .. method:: set_trace_callback(trace_callback, /) Register :term:`callable` *trace_callback* to be invoked for each SQL statement that is actually executed by the SQLite backend. @@ -1009,9 +1000,8 @@ Connection objects .. versionadded:: 3.3 - .. versionchanged:: 3.13 - Passing *trace_callback* as a keyword argument is deprecated. - The parameter will become positional-only in Python 3.15. + .. versionchanged:: 3.15 + The first parameter is now positional-only. .. method:: enable_load_extension(enabled, /) diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index c0dcecf737e..ae2e324d0ab 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -934,6 +934,13 @@ Constants .. versionadded:: 3.13 +.. data:: HAS_PSK_TLS13 + + Whether the OpenSSL library has built-in support for External PSKs in TLS + 1.3 as described in :rfc:`9258`. + + .. versionadded:: next + .. data:: HAS_PHA Whether the OpenSSL library has built-in support for TLS-PHA. diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index ee07debe1e0..6a4fe494ea7 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -1788,8 +1788,14 @@ expression support in the :mod:`re` module). Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is - returned if *width* is less than or equal to ``len(s)``. + returned if *width* is less than or equal to ``len(s)``. For example:: + >>> 'Python'.center(10) + ' Python ' + >>> 'Python'.center(10, '-') + '--Python--' + >>> 'Python'.center(4) + 'Python' .. method:: str.count(sub[, start[, end]]) @@ -1799,8 +1805,18 @@ expression support in the :mod:`re` module). interpreted as in slice notation. If *sub* is empty, returns the number of empty strings between characters - which is the length of the string plus one. + which is the length of the string plus one. For example:: + >>> 'spam, spam, spam'.count('spam') + 3 + >>> 'spam, spam, spam'.count('spam', 5) + 2 + >>> 'spam, spam, spam'.count('spam', 5, 10) + 1 + >>> 'spam, spam, spam'.count('eggs') + 0 + >>> 'spam, spam, spam'.count('') + 17 .. method:: str.encode(encoding="utf-8", errors="strict") @@ -2012,7 +2028,7 @@ expression support in the :mod:`re` module). .. method:: str.isprintable() - Return true if all characters in the string are printable, false if it + Return ``True`` if all characters in the string are printable, ``False`` if it contains at least one non-printable character. Here "printable" means the character is suitable for :func:`repr` to use in @@ -2269,6 +2285,18 @@ expression support in the :mod:`re` module). >>> ' 1 2 3 '.split() ['1', '2', '3'] + If *sep* is not specified or is ``None`` and *maxsplit* is ``0``, only + leading runs of consecutive whitespace are considered. + + For example:: + + >>> "".split(None, 0) + [] + >>> " ".split(None, 0) + [] + >>> " foo ".split(maxsplit=0) + ['foo '] + .. index:: single: universal newlines; str.splitlines method @@ -2450,6 +2478,146 @@ expression support in the :mod:`re` module). '-0042' +.. index:: + single: ! formatted string literal + single: formatted string literals + single: ! f-string + single: f-strings + single: fstring + single: interpolated string literal + single: string; formatted literal + single: string; interpolated literal + single: {} (curly brackets); in formatted string literal + single: ! (exclamation mark); in formatted string literal + single: : (colon); in formatted string literal + single: = (equals); for help in debugging using string literals + +Formatted String Literals (f-strings) +------------------------------------- + +.. versionadded:: 3.6 +.. versionchanged:: 3.7 + The :keyword:`await` and :keyword:`async for` can be used in expressions + within f-strings. +.. versionchanged:: 3.8 + Added the debugging operator (``=``) +.. versionchanged:: 3.12 + Many restrictions on expressions within f-strings have been removed. + Notably, nested strings, comments, and backslashes are now permitted. + +An :dfn:`f-string` (formally a :dfn:`formatted string literal`) is +a string literal that is prefixed with ``f`` or ``F``. +This type of string literal allows embedding arbitrary Python expressions +within *replacement fields*, which are delimited by curly brackets (``{}``). +These expressions are evaluated at runtime, similarly to :meth:`str.format`, +and are converted into regular :class:`str` objects. +For example: + +.. doctest:: + + >>> who = 'nobody' + >>> nationality = 'Spanish' + >>> f'{who.title()} expects the {nationality} Inquisition!' + 'Nobody expects the Spanish Inquisition!' + +It is also possible to use a multi line f-string: + +.. doctest:: + + >>> f'''This is a string + ... on two lines''' + 'This is a string\non two lines' + +A single opening curly bracket, ``'{'``, marks a *replacement field* that +can contain any Python expression: + +.. doctest:: + + >>> nationality = 'Spanish' + >>> f'The {nationality} Inquisition!' + 'The Spanish Inquisition!' + +To include a literal ``{`` or ``}``, use a double bracket: + +.. doctest:: + + >>> x = 42 + >>> f'{{x}} is {x}' + '{x} is 42' + +Functions can also be used, and :ref:`format specifiers `: + +.. doctest:: + + >>> from math import sqrt + >>> f'√2 \N{ALMOST EQUAL TO} {sqrt(2):.5f}' + '√2 ≈ 1.41421' + +Any non-string expression is converted using :func:`str`, by default: + +.. doctest:: + + >>> from fractions import Fraction + >>> f'{Fraction(1, 3)}' + '1/3' + +To use an explicit conversion, use the ``!`` (exclamation mark) operator, +followed by any of the valid formats, which are: + +========== ============== +Conversion Meaning +========== ============== +``!a`` :func:`ascii` +``!r`` :func:`repr` +``!s`` :func:`str` +========== ============== + +For example: + +.. doctest:: + + >>> from fractions import Fraction + >>> f'{Fraction(1, 3)!s}' + '1/3' + >>> f'{Fraction(1, 3)!r}' + 'Fraction(1, 3)' + >>> question = '¿Dónde está el Presidente?' + >>> print(f'{question!a}') + '\xbfD\xf3nde est\xe1 el Presidente?' + +While debugging it may be helpful to see both the expression and its value, +by using the equals sign (``=``) after the expression. +This preserves spaces within the brackets, and can be used with a converter. +By default, the debugging operator uses the :func:`repr` (``!r``) conversion. +For example: + +.. doctest:: + + >>> from fractions import Fraction + >>> calculation = Fraction(1, 3) + >>> f'{calculation=}' + 'calculation=Fraction(1, 3)' + >>> f'{calculation = }' + 'calculation = Fraction(1, 3)' + >>> f'{calculation = !s}' + 'calculation = 1/3' + +Once the output has been evaluated, it can be formatted using a +:ref:`format specifier ` following a colon (``':'``). +After the expression has been evaluated, and possibly converted to a string, +the :meth:`!__format__` method of the result is called with the format specifier, +or the empty string if no format specifier is given. +The formatted result is then used as the final value for the replacement field. +For example: + +.. doctest:: + + >>> from fractions import Fraction + >>> f'{Fraction(1, 7):.6f}' + '0.142857' + >>> f'{Fraction(1, 7):_^+10}' + '___+1/7___' + .. _old-string-formatting: @@ -4677,7 +4845,13 @@ can be used interchangeably to index the same dictionary entry. being added is already present, the value from the keyword argument replaces the value from the positional argument. - To illustrate, the following examples all return a dictionary equal to + Providing keyword arguments as in the first example only works for keys that + are valid Python identifiers. Otherwise, any valid keys can be used. + + Dictionaries compare equal if and only if they have the same ``(key, + value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise + :exc:`TypeError`. To illustrate dictionary creation and equality, + the following examples all return a dictionary equal to ``{"one": 1, "two": 2, "three": 3}``:: >>> a = dict(one=1, two=2, three=3) @@ -4692,6 +4866,27 @@ can be used interchangeably to index the same dictionary entry. Providing keyword arguments as in the first example only works for keys that are valid Python identifiers. Otherwise, any valid keys can be used. + Dictionaries preserve insertion order. Note that updating a key does not + affect the order. Keys added after deletion are inserted at the end. :: + + >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} + >>> d + {'one': 1, 'two': 2, 'three': 3, 'four': 4} + >>> list(d) + ['one', 'two', 'three', 'four'] + >>> list(d.values()) + [1, 2, 3, 4] + >>> d["one"] = 42 + >>> d + {'one': 42, 'two': 2, 'three': 3, 'four': 4} + >>> del d["two"] + >>> d["two"] = None + >>> d + {'one': 42, 'three': 3, 'four': 4, 'two': None} + + .. versionchanged:: 3.7 + Dictionary order is guaranteed to be insertion order. This behavior was + an implementation detail of CPython from 3.6. These are the operations that dictionaries support (and therefore, custom mapping types should support too): @@ -4862,32 +5057,6 @@ can be used interchangeably to index the same dictionary entry. .. versionadded:: 3.9 - Dictionaries compare equal if and only if they have the same ``(key, - value)`` pairs (regardless of ordering). Order comparisons ('<', '<=', '>=', '>') raise - :exc:`TypeError`. - - Dictionaries preserve insertion order. Note that updating a key does not - affect the order. Keys added after deletion are inserted at the end. :: - - >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} - >>> d - {'one': 1, 'two': 2, 'three': 3, 'four': 4} - >>> list(d) - ['one', 'two', 'three', 'four'] - >>> list(d.values()) - [1, 2, 3, 4] - >>> d["one"] = 42 - >>> d - {'one': 42, 'two': 2, 'three': 3, 'four': 4} - >>> del d["two"] - >>> d["two"] = None - >>> d - {'one': 42, 'three': 3, 'four': 4, 'two': None} - - .. versionchanged:: 3.7 - Dictionary order is guaranteed to be insertion order. This behavior was - an implementation detail of CPython from 3.6. - Dictionaries and dictionary views are reversible. :: >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 5e2f35497cb..c4012483a52 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -131,16 +131,18 @@ implementation as the built-in :meth:`~str.format` method. (which can happen if two replacement fields occur consecutively), then *literal_text* will be a zero-length string. If there is no replacement field, then the values of *field_name*, *format_spec* and *conversion* - will be ``None``. + will be ``None``. The value of *field_name* is unmodified and + auto-numbering of non-numbered positional fields is done by :meth:`vformat`. .. method:: get_field(field_name, args, kwargs) - Given *field_name* as returned by :meth:`parse` (see above), convert it to - an object to be formatted. Returns a tuple (obj, used_key). The default - version takes strings of the form defined in :pep:`3101`, such as - "0[name]" or "label.title". *args* and *kwargs* are as passed in to - :meth:`vformat`. The return value *used_key* has the same meaning as the - *key* parameter to :meth:`get_value`. + Given *field_name*, convert it to an object to be formatted. + Auto-numbering of *field_name* returned from :meth:`parse` is done by + :meth:`vformat` before calling this method. Returns a tuple (obj, used_key). + The default version takes strings of the form defined in :pep:`3101`, + such as "0[name]" or "label.title". *args* and *kwargs* are as passed in to + :meth:`vformat`. The return value *used_key* has the same meaning + as the *key* parameter to :meth:`get_value`. .. method:: get_value(key, args, kwargs) @@ -856,7 +858,7 @@ these rules. The methods of :class:`Template` are: .. method:: is_valid() - Returns false if the template has invalid placeholders that will cause + Returns ``False`` if the template has invalid placeholders that will cause :meth:`substitute` to raise :exc:`ValueError`. .. versionadded:: 3.11 diff --git a/Doc/library/struct.rst b/Doc/library/struct.rst index 3ea9e5ba071..17fc479fd0c 100644 --- a/Doc/library/struct.rst +++ b/Doc/library/struct.rst @@ -260,6 +260,10 @@ platform-dependent. +--------+--------------------------+--------------------+----------------+------------+ | ``d`` | :c:expr:`double` | float | 8 | \(4) | +--------+--------------------------+--------------------+----------------+------------+ +| ``F`` | :c:expr:`float complex` | complex | 8 | \(10) | ++--------+--------------------------+--------------------+----------------+------------+ +| ``D`` | :c:expr:`double complex` | complex | 16 | \(10) | ++--------+--------------------------+--------------------+----------------+------------+ | ``s`` | :c:expr:`char[]` | bytes | | \(9) | +--------+--------------------------+--------------------+----------------+------------+ | ``p`` | :c:expr:`char[]` | bytes | | \(8) | @@ -267,17 +271,6 @@ platform-dependent. | ``P`` | :c:expr:`void \*` | integer | | \(5) | +--------+--------------------------+--------------------+----------------+------------+ -Additionally, if IEC 60559 compatible complex arithmetic (Annex G of the -C11 standard) is supported, the following format characters are available: - -+--------+--------------------------+--------------------+----------------+------------+ -| Format | C Type | Python type | Standard size | Notes | -+========+==========================+====================+================+============+ -| ``E`` | :c:expr:`float complex` | complex | 8 | \(10) | -+--------+--------------------------+--------------------+----------------+------------+ -| ``C`` | :c:expr:`double complex` | complex | 16 | \(10) | -+--------+--------------------------+--------------------+----------------+------------+ - .. versionchanged:: 3.3 Added support for the ``'n'`` and ``'N'`` formats. @@ -285,7 +278,7 @@ C11 standard) is supported, the following format characters are available: Added support for the ``'e'`` format. .. versionchanged:: 3.14 - Added support for the ``'E'`` and ``'C'`` formats. + Added support for the ``'F'`` and ``'D'`` formats. Notes: @@ -364,9 +357,14 @@ Notes: ``'0c'`` means 0 characters). (10) - For the ``'E'`` and ``'C'`` format characters, the packed representation uses + For the ``'F'`` and ``'D'`` format characters, the packed representation uses the IEEE 754 binary32 and binary64 format for components of the complex number, regardless of the floating-point format used by the platform. + Note that complex types (``F`` and ``D``) are available unconditionally, + despite complex types being an optional feature in C. + As specified in the C11 standard, each complex type is represented by a + two-element C array containing, respectively, the real and imaginary parts. + A format character may be preceded by an integral repeat count. For example, the format string ``'4h'`` means exactly the same as ``'hhhh'``. diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index 05d09e304b3..028a7861f36 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -1525,6 +1525,24 @@ handling consistency are valid for these functions. Notes ----- +.. _subprocess-timeout-behavior: + +Timeout Behavior +^^^^^^^^^^^^^^^^ + +When using the ``timeout`` parameter in functions like :func:`run`, +:meth:`Popen.wait`, or :meth:`Popen.communicate`, +users should be aware of the following behaviors: + +1. **Process Creation Delay**: The initial process creation itself cannot be interrupted + on many platform APIs. This means that even when specifying a timeout, you are not + guaranteed to see a timeout exception until at least after however long process + creation takes. + +2. **Extremely Small Timeout Values**: Setting very small timeout values (such as a few + milliseconds) may result in almost immediate :exc:`TimeoutExpired` exceptions because + process creation and system scheduling inherently require time. + .. _converting-argument-sequence: Converting an argument sequence to a string on Windows diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 918dba9e028..0674074b8c0 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -160,7 +160,7 @@ More events may be added in the future. These events are attributes of the :mod:`!sys.monitoring.events` namespace. Each event is represented as a power-of-2 integer constant. -To define a set of events, simply bitwise or the individual events together. +To define a set of events, simply bitwise OR the individual events together. For example, to specify both :monitoring-event:`PY_RETURN` and :monitoring-event:`PY_START` events, use the expression ``PY_RETURN | PY_START``. diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index dbb5817d340..55e442b20ff 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -130,7 +130,7 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. data:: base_exec_prefix - Equivalent to :data:`exec_prefix`, but refering to the base Python installation. + Equivalent to :data:`exec_prefix`, but referring to the base Python installation. When running under :ref:`sys-path-init-virtual-environments`, :data:`exec_prefix` gets overwritten to the virtual environment prefix. @@ -143,7 +143,7 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. data:: base_prefix - Equivalent to :data:`prefix`, but refering to the base Python installation. + Equivalent to :data:`prefix`, but referring to the base Python installation. When running under :ref:`virtual environment `, :data:`prefix` gets overwritten to the virtual environment prefix. @@ -1282,6 +1282,64 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. versionadded:: 3.5 +.. data:: _jit + + Utilities for observing just-in-time compilation. + + .. impl-detail:: + + JIT compilation is an *experimental implementation detail* of CPython. + ``sys._jit`` is not guaranteed to exist or behave the same way in all + Python implementations, versions, or build configurations. + + .. versionadded:: 3.14 + + .. function:: _jit.is_available() + + Return ``True`` if the current Python executable supports JIT compilation, + and ``False`` otherwise. This can be controlled by building CPython with + the ``--experimental-jit`` option on Windows, and the + :option:`--enable-experimental-jit` option on all other platforms. + + .. function:: _jit.is_enabled() + + Return ``True`` if JIT compilation is enabled for the current Python + process (implies :func:`sys._jit.is_available`), and ``False`` otherwise. + If JIT compilation is available, this can be controlled by setting the + :envvar:`PYTHON_JIT` environment variable to ``0`` (disabled) or ``1`` + (enabled) at interpreter startup. + + .. function:: _jit.is_active() + + Return ``True`` if the topmost Python frame is currently executing JIT + code (implies :func:`sys._jit.is_enabled`), and ``False`` otherwise. + + .. note:: + + This function is intended for testing and debugging the JIT itself. + It should be avoided for any other purpose. + + .. note:: + + Due to the nature of tracing JIT compilers, repeated calls to this + function may give surprising results. For example, branching on its + return value will likely lead to unexpected behavior (if doing so + causes JIT code to be entered or exited): + + .. code-block:: pycon + + >>> for warmup in range(BIG_NUMBER): + ... # This line is "hot", and is eventually JIT-compiled: + ... if sys._jit.is_active(): + ... # This line is "cold", and is run in the interpreter: + ... assert sys._jit.is_active() + ... + Traceback (most recent call last): + File "", line 5, in + assert sys._jit.is_active() + ~~~~~~~~~~~~~~~~~~^^ + AssertionError + .. data:: last_exc This variable is not always defined; it is set to the exception instance diff --git a/Doc/library/sys_path_init.rst b/Doc/library/sys_path_init.rst index 18fe32d9c7f..a37bb59e7ce 100644 --- a/Doc/library/sys_path_init.rst +++ b/Doc/library/sys_path_init.rst @@ -97,7 +97,7 @@ Please refer to :mod:`site`'s .. note:: There are other ways how "virtual environments" could be implemented, this - documentation referes implementations based on the ``pyvenv.cfg`` mechanism, + documentation refers implementations based on the ``pyvenv.cfg`` mechanism, such as :mod:`venv`. Most virtual environment implementations follow the model set by :mod:`venv`, but there may be exotic implementations that diverge from it. diff --git a/Doc/library/sysconfig.rst b/Doc/library/sysconfig.rst index 9f018f9c8f0..684d14a74c4 100644 --- a/Doc/library/sysconfig.rst +++ b/Doc/library/sysconfig.rst @@ -429,9 +429,10 @@ Other functions Return the path of :file:`Makefile`. .. _sysconfig-cli: +.. _using-sysconfig-as-a-script: -Using :mod:`sysconfig` as a script ----------------------------------- +Command-line usage +------------------ You can use :mod:`sysconfig` as a script with Python's *-m* option: diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index c9d69cf5094..f9cb5495e60 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -112,7 +112,7 @@ Some facts and figures: ``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument *compresslevel* (default ``9``) to specify the compression level of the file. - For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the + For modes ``'w:xz'``, ``'x:xz'`` and ``'w|xz'``, :func:`tarfile.open` accepts the keyword argument *preset* to specify the compression level of the file. For special purposes, there is a second format for *mode*: @@ -167,6 +167,9 @@ Some facts and figures: .. versionchanged:: 3.12 The *compresslevel* keyword argument also works for streams. + .. versionchanged:: 3.14 + The *preset* keyword argument also works for streams. + .. class:: TarFile :noindex: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 8c06a2b04bf..0aae14c15a6 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -853,7 +853,7 @@ The :mod:`test.support` module defines the following functions: .. function:: linked_with_musl() - Return ``False`` if there is no evidence the interperter was compiled with + Return ``False`` if there is no evidence the interpreter was compiled with ``musl``, otherwise return a version triple, either ``(0, 0, 0)`` if the version is unknown, or the actual version if it is known. Intended for use in ``skip`` decorators. ``emscripten`` and ``wasi`` are assumed to be diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index 989c7624448..7edcdcabdce 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -11,6 +11,52 @@ This module constructs higher-level threading interfaces on top of the lower level :mod:`_thread` module. +.. include:: ../includes/wasm-notavail.rst + +Introduction +------------ + +The :mod:`!threading` module provides a way to run multiple `threads +`_ (smaller +units of a process) concurrently within a single process. It allows for the +creation and management of threads, making it possible to execute tasks in +parallel, sharing memory space. Threads are particularly useful when tasks are +I/O bound, such as file operations or making network requests, +where much of the time is spent waiting for external resources. + +A typical use case for :mod:`!threading` includes managing a pool of worker +threads that can process multiple tasks concurrently. Here's a basic example of +creating and starting threads using :class:`~threading.Thread`:: + + import threading + import time + + def crawl(link, delay=3): + print(f"crawl started for {link}") + time.sleep(delay) # Blocking I/O (simulating a network request) + print(f"crawl ended for {link}") + + links = [ + "https://python.org", + "https://docs.python.org", + "https://peps.python.org", + ] + + # Start threads for each link + threads = [] + for link in links: + # Using `args` to pass positional arguments and `kwargs` for keyword arguments + t = threading.Thread(target=crawl, args=(link,), kwargs={"delay": 2}) + threads.append(t) + + # Start each thread + for t in threads: + t.start() + + # Wait for all threads to finish + for t in threads: + t.join() + .. versionchanged:: 3.7 This module used to be optional, it is now always available. @@ -45,7 +91,25 @@ level :mod:`_thread` module. However, threading is still an appropriate model if you want to run multiple I/O-bound tasks simultaneously. -.. include:: ../includes/wasm-notavail.rst +GIL and performance considerations +---------------------------------- + +Unlike the :mod:`multiprocessing` module, which uses separate processes to +bypass the :term:`global interpreter lock` (GIL), the threading module operates +within a single process, meaning that all threads share the same memory space. +However, the GIL limits the performance gains of threading when it comes to +CPU-bound tasks, as only one thread can execute Python bytecode at a time. +Despite this, threads remain a useful tool for achieving concurrency in many +scenarios. + +As of Python 3.13, experimental :term:`free-threaded ` builds +can disable the GIL, enabling true parallel execution of threads, but this +feature is not available by default (see :pep:`703`). + +.. TODO: At some point this feature will become available by default. + +Reference +--------- This module defines the following functions: @@ -62,7 +126,7 @@ This module defines the following functions: Return the current :class:`Thread` object, corresponding to the caller's thread of control. If the caller's thread of control was not created through the - :mod:`threading` module, a dummy thread object with limited functionality is + :mod:`!threading` module, a dummy thread object with limited functionality is returned. The function ``currentThread`` is a deprecated alias for this function. @@ -157,13 +221,13 @@ This module defines the following functions: .. index:: single: trace function - Set a trace function for all threads started from the :mod:`threading` module. + Set a trace function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.settrace` for each thread, before its :meth:`~Thread.run` method is called. .. function:: settrace_all_threads(func) - Set a trace function for all threads started from the :mod:`threading` module + Set a trace function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.settrace` for each thread, before its @@ -186,13 +250,13 @@ This module defines the following functions: .. index:: single: profile function - Set a profile function for all threads started from the :mod:`threading` module. + Set a profile function for all threads started from the :mod:`!threading` module. The *func* will be passed to :func:`sys.setprofile` for each thread, before its :meth:`~Thread.run` method is called. .. function:: setprofile_all_threads(func) - Set a profile function for all threads started from the :mod:`threading` module + Set a profile function for all threads started from the :mod:`!threading` module and all Python threads that are currently executing. The *func* will be passed to :func:`sys.setprofile` for each thread, before its @@ -257,31 +321,140 @@ when implemented, are mapped to module-level functions. All of the methods described below are executed atomically. -Thread-Local Data ------------------ +Thread-local data +^^^^^^^^^^^^^^^^^ -Thread-local data is data whose values are thread specific. To manage -thread-local data, just create an instance of :class:`local` (or a -subclass) and store attributes on it:: +Thread-local data is data whose values are thread specific. If you +have data that you want to be local to a thread, create a +:class:`local` object and use its attributes:: - mydata = threading.local() - mydata.x = 1 + >>> mydata = local() + >>> mydata.number = 42 + >>> mydata.number + 42 -The instance's values will be different for separate threads. +You can also access the :class:`local`-object's dictionary:: + + >>> mydata.__dict__ + {'number': 42} + >>> mydata.__dict__.setdefault('widgets', []) + [] + >>> mydata.widgets + [] + +If we access the data in a different thread:: + + >>> log = [] + >>> def f(): + ... items = sorted(mydata.__dict__.items()) + ... log.append(items) + ... mydata.number = 11 + ... log.append(mydata.number) + + >>> import threading + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[], 11] + +we get different data. Furthermore, changes made in the other thread +don't affect data seen in this thread:: + + >>> mydata.number + 42 + +Of course, values you get from a :class:`local` object, including their +:attr:`~object.__dict__` attribute, are for whatever thread was current +at the time the attribute was read. For that reason, you generally +don't want to save these values across threads, as they apply only to +the thread they came from. + +You can create custom :class:`local` objects by subclassing the +:class:`local` class:: + + >>> class MyLocal(local): + ... number = 2 + ... def __init__(self, /, **kw): + ... self.__dict__.update(kw) + ... def squared(self): + ... return self.number ** 2 + +This can be useful to support default values, methods and +initialization. Note that if you define an :py:meth:`~object.__init__` +method, it will be called each time the :class:`local` object is used +in a separate thread. This is necessary to initialize each thread's +dictionary. + +Now if we create a :class:`local` object:: + + >>> mydata = MyLocal(color='red') + +we have a default number:: + + >>> mydata.number + 2 + +an initial color:: + + >>> mydata.color + 'red' + >>> del mydata.color + +And a method that operates on the data:: + + >>> mydata.squared() + 4 + +As before, we can access the data in a separate thread:: + + >>> log = [] + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + >>> log + [[('color', 'red')], 11] + +without affecting this thread's data:: + + >>> mydata.number + 2 + >>> mydata.color + Traceback (most recent call last): + ... + AttributeError: 'MyLocal' object has no attribute 'color' + +Note that subclasses can define :term:`__slots__`, but they are not +thread local. They are shared across threads:: + + >>> class MyLocal(local): + ... __slots__ = 'number' + + >>> mydata = MyLocal() + >>> mydata.number = 42 + >>> mydata.color = 'red' + +So, the separate thread:: + + >>> thread = threading.Thread(target=f) + >>> thread.start() + >>> thread.join() + +affects what we see:: + + >>> mydata.number + 11 .. class:: local() A class that represents thread-local data. - For more details and extensive examples, see the documentation string of the - :mod:`!_threading_local` module: :source:`Lib/_threading_local.py`. - .. _thread-objects: -Thread Objects --------------- +Thread objects +^^^^^^^^^^^^^^ The :class:`Thread` class represents an activity that is run in a separate thread of control. There are two ways to specify the activity: by passing a @@ -448,6 +621,14 @@ since it is impossible to detect the termination of alien threads. an error to :meth:`~Thread.join` a thread before it has been started and attempts to do so raise the same exception. + If an attempt is made to join a running daemonic thread in in late stages + of :term:`Python finalization ` :meth:`!join` + raises a :exc:`PythonFinalizationError`. + + .. versionchanged:: 3.14 + + May raise :exc:`PythonFinalizationError`. + .. attribute:: name A string used for identification purposes only. It has no semantics. @@ -528,8 +709,8 @@ since it is impossible to detect the termination of alien threads. .. _lock-objects: -Lock Objects ------------- +Lock objects +^^^^^^^^^^^^ A primitive lock is a synchronization primitive that is not owned by a particular thread when locked. In Python, it is currently the lowest level @@ -621,8 +802,8 @@ All methods are executed atomically. .. _rlock-objects: -RLock Objects -------------- +RLock objects +^^^^^^^^^^^^^ A reentrant lock is a synchronization primitive that may be acquired multiple times by the same thread. Internally, it uses the concepts of "owning thread" @@ -731,8 +912,8 @@ call release as many times the lock has been acquired can lead to deadlock. .. _condition-objects: -Condition Objects ------------------ +Condition objects +^^^^^^^^^^^^^^^^^ A condition variable is always associated with some kind of lock; this can be passed in or one will be created by default. Passing one in is useful when @@ -909,8 +1090,8 @@ item to the buffer only needs to wake up one consumer thread. .. _semaphore-objects: -Semaphore Objects ------------------ +Semaphore objects +^^^^^^^^^^^^^^^^^ This is one of the oldest synchronization primitives in the history of computer science, invented by the early Dutch computer scientist Edsger W. Dijkstra (he @@ -990,7 +1171,7 @@ Semaphores also support the :ref:`context management protocol `. .. _semaphore-examples: -:class:`Semaphore` Example +:class:`Semaphore` example ^^^^^^^^^^^^^^^^^^^^^^^^^^ Semaphores are often used to guard resources with limited capacity, for example, @@ -1018,8 +1199,8 @@ causes the semaphore to be released more than it's acquired will go undetected. .. _event-objects: -Event Objects -------------- +Event objects +^^^^^^^^^^^^^ This is one of the simplest mechanisms for communication between threads: one thread signals an event and other threads wait for it. @@ -1075,8 +1256,8 @@ method. The :meth:`~Event.wait` method blocks until the flag is true. .. _timer-objects: -Timer Objects -------------- +Timer objects +^^^^^^^^^^^^^ This class represents an action that should be run only after a certain amount of time has passed --- a timer. :class:`Timer` is a subclass of :class:`Thread` @@ -1113,8 +1294,8 @@ For example:: only work if the timer is still in its waiting stage. -Barrier Objects ---------------- +Barrier objects +^^^^^^^^^^^^^^^ .. versionadded:: 3.2 diff --git a/Doc/library/token.rst b/Doc/library/token.rst index 24455b1ef77..1f92b5df430 100644 --- a/Doc/library/token.rst +++ b/Doc/library/token.rst @@ -131,6 +131,41 @@ The token constants are: The token string contains the closing quote(s). +.. data:: TSTRING_START + + Token value used to indicate the beginning of a template string literal. + + .. impl-detail:: + + The token string includes the prefix and the opening quote(s), but none + of the contents of the literal. + + .. versionadded:: 3.14 + +.. data:: TSTRING_MIDDLE + + Token value used for literal text inside a template string literal + including format specifications. + + .. impl-detail:: + + Replacement fields (that is, the non-literal parts of t-strings) use + the same tokens as other expressions, and are delimited by + :data:`LBRACE`, :data:`RBRACE`, :data:`EXCLAMATION` and :data:`COLON` + tokens. + + .. versionadded:: 3.14 + +.. data:: TSTRING_END + + Token value used to indicate the end of a template string literal. + + .. impl-detail:: + + The token string contains the closing quote(s). + + .. versionadded:: 3.14 + .. data:: ENDMARKER Token value that indicates the end of input. diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index 0bff970fa77..fea6b57edf0 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -1,6 +1,6 @@ -================================= -:mod:`turtle` --- Turtle graphics -================================= +================================== +:mod:`!turtle` --- Turtle graphics +================================== .. module:: turtle :synopsis: An educational framework for simple graphics applications diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index 84f77e8f206..54cc3ea3311 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -1,6 +1,6 @@ -======================================== -:mod:`typing` --- Support for type hints -======================================== +========================================= +:mod:`!typing` --- Support for type hints +========================================= .. testsetup:: * @@ -665,7 +665,7 @@ through a simple assignment:: User-defined generics for parameter expressions are also supported via parameter specification variables in the form ``[**P]``. The behavior is consistent with type variables' described above as parameter specification variables are -treated by the typing module as a specialized type variable. The one exception +treated by the :mod:`!typing` module as a specialized type variable. The one exception to this is that a list of types can be used to substitute a :class:`ParamSpec`:: >>> class Z[T, **P]: ... # T is a TypeVar; P is a ParamSpec @@ -706,7 +706,7 @@ are intended primarily for static type checking. A user-defined generic class can have ABCs as base classes without a metaclass conflict. Generic metaclasses are not supported. The outcome of parameterizing -generics is cached, and most types in the typing module are :term:`hashable` and +generics is cached, and most types in the :mod:`!typing` module are :term:`hashable` and comparable for equality. @@ -1098,6 +1098,12 @@ These can be used as types in annotations. They all support subscription using Union[Union[int, str], float] == Union[int, str, float] + However, this does not apply to unions referenced through a type + alias, to avoid forcing evaluation of the underlying :class:`TypeAliasType`:: + + type A = Union[int, str] + Union[A, float] != Union[int, str, float] + * Unions of a single argument vanish, e.g.:: Union[int] == int # The constructor actually returns int @@ -1230,6 +1236,32 @@ These can be used as types in annotations. They all support subscription using is allowed as type argument to ``Literal[...]``, but type checkers may impose restrictions. See :pep:`586` for more details about literal types. + Additional details: + + * The arguments must be literal values and there must be at least one. + + * Nested ``Literal`` types are flattened, e.g.:: + + assert Literal[Literal[1, 2], 3] == Literal[1, 2, 3] + + However, this does not apply to ``Literal`` types referenced through a type + alias, to avoid forcing evaluation of the underlying :class:`TypeAliasType`:: + + type A = Literal[1, 2] + assert Literal[A, 3] != Literal[1, 2, 3] + + * Redundant arguments are skipped, e.g.:: + + assert Literal[1, 2, 1] == Literal[1, 2] + + * When comparing literals, the argument order is ignored, e.g.:: + + assert Literal[1, 2] == Literal[2, 1] + + * You cannot subclass or instantiate a ``Literal``. + + * You cannot write ``Literal[X][Y]``. + .. versionadded:: 3.8 .. versionchanged:: 3.9.1 @@ -1400,6 +1432,14 @@ These can be used as types in annotations. They all support subscription using int, ValueRange(3, 10), ctype("char") ] + However, this does not apply to ``Annotated`` types referenced through a type + alias, to avoid forcing evaluation of the underlying :class:`TypeAliasType`:: + + type From3To10[T] = Annotated[T, ValueRange(3, 10)] + assert Annotated[From3To10[int], ctype("char")] != Annotated[ + int, ValueRange(3, 10), ctype("char") + ] + Duplicated metadata elements are not removed:: assert Annotated[int, ValueRange(3, 10)] != Annotated[ @@ -2455,7 +2495,8 @@ types. See :pep:`544` for more details. Protocol classes decorated with :func:`runtime_checkable` (described later) act as simple-minded runtime protocols that check only the presence of given attributes, ignoring their - type signatures. + type signatures. Protocol classes without this decorator cannot be used + as the second argument to :func:`isinstance` or :func:`issubclass`. Protocol classes can be generic, for example:: @@ -2479,8 +2520,7 @@ types. Mark a protocol class as a runtime protocol. Such a protocol can be used with :func:`isinstance` and :func:`issubclass`. - This raises :exc:`TypeError` when applied to a non-protocol class. This - allows a simple-minded structural check, very similar to "one trick ponies" + This allows a simple-minded structural check, very similar to "one trick ponies" in :mod:`collections.abc` such as :class:`~collections.abc.Iterable`. For example:: @runtime_checkable @@ -2496,6 +2536,8 @@ types. import threading assert isinstance(threading.Thread(name='Bob'), Named) + This decorator raises :exc:`TypeError` when applied to a non-protocol class. + .. note:: :func:`!runtime_checkable` will check only the presence of the required @@ -2785,7 +2827,7 @@ types. Protocols --------- -The following protocols are provided by the typing module. All are decorated +The following protocols are provided by the :mod:`!typing` module. All are decorated with :func:`@runtime_checkable `. .. class:: SupportsAbs @@ -3529,7 +3571,7 @@ Deprecated aliases ------------------ This module defines several deprecated aliases to pre-existing -standard library classes. These were originally included in the typing +standard library classes. These were originally included in the :mod:`!typing` module in order to support parameterizing these generic classes using ``[]``. However, the aliases became redundant in Python 3.9 when the corresponding pre-existing classes were enhanced to support ``[]`` (see @@ -3542,7 +3584,7 @@ interpreter for these aliases. If at some point it is decided to remove these deprecated aliases, a deprecation warning will be issued by the interpreter for at least two releases -prior to removal. The aliases are guaranteed to remain in the typing module +prior to removal. The aliases are guaranteed to remain in the :mod:`!typing` module without deprecation warnings until at least Python 3.14. Type checkers are encouraged to flag uses of the deprecated types if the diff --git a/Doc/library/unittest.mock.rst b/Doc/library/unittest.mock.rst index 27c169dde72..091562cc9ae 100644 --- a/Doc/library/unittest.mock.rst +++ b/Doc/library/unittest.mock.rst @@ -2654,9 +2654,9 @@ with any methods on the mock: .. code-block:: pycon - >>> mock.has_data() + >>> mock.header_items() - >>> mock.has_data.assret_called_with() # Intentional typo! + >>> mock.header_items.assret_called_with() # Intentional typo! Auto-speccing solves this problem. You can either pass ``autospec=True`` to :func:`patch` / :func:`patch.object` or use the :func:`create_autospec` function to create a diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 61022fe052c..dcdda1719bf 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -109,7 +109,7 @@ Here is a short script to test three string methods:: unittest.main() -A testcase is created by subclassing :class:`unittest.TestCase`. The three +A test case is created by subclassing :class:`unittest.TestCase`. The three individual tests are defined with methods whose names start with the letters ``test``. This naming convention informs the test runner about which methods represent tests. diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 8b54e10713e..58bd111b5cc 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -146,39 +146,57 @@ The :mod:`urllib.request` module defines the following functions: attribute to modify its position in the handlers list. -.. function:: pathname2url(path) +.. function:: pathname2url(path, *, add_scheme=False) Convert the given local path to a ``file:`` URL. This function uses - :func:`~urllib.parse.quote` function to encode the path. For historical - reasons, the return value omits the ``file:`` scheme prefix. This example - shows the function being used on Windows:: + :func:`~urllib.parse.quote` function to encode the path. + + If *add_scheme* is false (the default), the return value omits the + ``file:`` scheme prefix. Set *add_scheme* to true to return a complete URL. + + This example shows the function being used on Windows:: >>> from urllib.request import pathname2url >>> path = 'C:\\Program Files' - >>> 'file:' + pathname2url(path) + >>> pathname2url(path, add_scheme=True) 'file:///C:/Program%20Files' + .. versionchanged:: 3.14 + Windows drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an + :exc:`OSError` exception to be raised on Windows. + .. versionchanged:: 3.14 Paths beginning with a slash are converted to URLs with authority sections. For example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``. .. versionchanged:: 3.14 - Windows drive letters are no longer converted to uppercase, and ``:`` - characters not following a drive letter no longer cause an - :exc:`OSError` exception to be raised on Windows. + The *add_scheme* parameter was added. -.. function:: url2pathname(url) +.. function:: url2pathname(url, *, require_scheme=False, resolve_host=False) Convert the given ``file:`` URL to a local path. This function uses - :func:`~urllib.parse.unquote` to decode the URL. For historical reasons, - the given value *must* omit the ``file:`` scheme prefix. This example shows - the function being used on Windows:: + :func:`~urllib.parse.unquote` to decode the URL. + + If *require_scheme* is false (the default), the given value should omit a + ``file:`` scheme prefix. If *require_scheme* is set to true, the given + value should include the prefix; a :exc:`~urllib.error.URLError` is raised + if it doesn't. + + The URL authority is discarded if it is empty, ``localhost``, or the local + hostname. Otherwise, if *resolve_host* is set to true, the authority is + resolved using :func:`socket.gethostbyname` and discarded if it matches a + local IP address (as per :rfc:`RFC 8089 §3 <8089#section-3>`). If the + authority is still unhandled, then on Windows a UNC path is returned, and + on other platforms a :exc:`~urllib.error.URLError` is raised. + + This example shows the function being used on Windows:: >>> from urllib.request import url2pathname >>> url = 'file:///C:/Program%20Files' - >>> url2pathname(url.removeprefix('file:')) + >>> url2pathname(url, require_scheme=True) 'C:\\Program Files' .. versionchanged:: 3.14 @@ -186,6 +204,15 @@ The :mod:`urllib.request` module defines the following functions: characters not following a drive letter no longer cause an :exc:`OSError` exception to be raised on Windows. + .. versionchanged:: 3.14 + The URL authority is discarded if it matches the local hostname. + Otherwise, if the authority isn't empty or ``localhost``, then on + Windows a UNC path is returned (as before), and on other platforms a + :exc:`~urllib.error.URLError` is raised. + + .. versionchanged:: 3.14 + The *require_scheme* and *resolve_host* parameters were added. + .. function:: getproxies() @@ -1094,7 +1121,7 @@ HTTPHandler Objects .. method:: HTTPHandler.http_open(req) Send an HTTP request, which can be either GET or POST, depending on - ``req.has_data()``. + ``req.data``. .. _https-handler-objects: @@ -1106,7 +1133,7 @@ HTTPSHandler Objects .. method:: HTTPSHandler.https_open(req) Send an HTTPS request, which can be either GET or POST, depending on - ``req.has_data()``. + ``req.data``. .. _file-handler-objects: diff --git a/Doc/library/wave.rst b/Doc/library/wave.rst index 36c2bde87fb..a3f5bfd5e2f 100644 --- a/Doc/library/wave.rst +++ b/Doc/library/wave.rst @@ -123,26 +123,6 @@ Wave_read Objects Rewind the file pointer to the beginning of the audio stream. - The following two methods are defined for compatibility with the old :mod:`!aifc` - module, and don't do anything interesting. - - - .. method:: getmarkers() - - Returns ``None``. - - .. deprecated-removed:: 3.13 3.15 - The method only existed for compatibility with the :mod:`!aifc` module - which has been removed in Python 3.13. - - - .. method:: getmark(id) - - Raise an error. - - .. deprecated-removed:: 3.13 3.15 - The method only existed for compatibility with the :mod:`!aifc` module - which has been removed in Python 3.13. The following two methods define a term "position" which is compatible between them, and is otherwise implementation dependent. diff --git a/Doc/library/webbrowser.rst b/Doc/library/webbrowser.rst index d33e7344761..fd6abc70261 100644 --- a/Doc/library/webbrowser.rst +++ b/Doc/library/webbrowser.rst @@ -24,8 +24,17 @@ If the environment variable :envvar:`BROWSER` exists, it is interpreted as the :data:`os.pathsep`-separated list of browsers to try ahead of the platform defaults. When the value of a list part contains the string ``%s``, then it is interpreted as a literal browser command line to be used with the argument URL -substituted for ``%s``; if the part does not contain ``%s``, it is simply -interpreted as the name of the browser to launch. [1]_ +substituted for ``%s``; if the value is a single word that refers to one of the +already registered browsers this browser is added to the front of the search list; +if the part does not contain ``%s``, it is simply interpreted as the name of the +browser to launch. [1]_ + +.. versionchanged:: 3.14 + + The :envvar:`BROWSER` variable can now also be used to reorder the list of + platform defaults. This is particularly useful on macOS where the platform + defaults do not refer to command-line tools on :envvar:`PATH`. + For non-Unix platforms, or when a remote browser is available on Unix, the controlling process will not wait for the user to finish with the browser, but @@ -226,8 +235,8 @@ Here are some simple examples:: Browser Controller Objects -------------------------- -Browser controllers provide these methods which parallel three of the -module-level convenience functions: +Browser controllers provide the :attr:`~controller.name` attribute, +and the following three methods which parallel module-level convenience functions: .. attribute:: controller.name diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index db0537f8ccc..75ead3c4cb1 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -353,3 +353,8 @@ the following constants: http://www.zlib.net/manual.html The zlib manual explains the semantics and usage of the library's many functions. + + In case gzip (de)compression is a bottleneck, the `python-isal`_ + package speeds up (de)compression with a mostly compatible API. + + .. _python-isal: https://github.com/pycompression/python-isal diff --git a/Doc/license.rst b/Doc/license.rst index 90783e3e31a..480414bb84c 100644 --- a/Doc/license.rst +++ b/Doc/license.rst @@ -1132,3 +1132,40 @@ The file is distributed under the 2-Clause BSD License:: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +Zstandard bindings +------------------ + +Zstandard bindings in :file:`Modules/_zstd` and :file:`Lib/compression/zstd` +are based on code from the +`pyzstd library `_, copyright Ma Lin and +contributors. The pyzstd code is distributed under the 3-Clause BSD License:: + + Copyright (c) 2020-present, Ma Lin and contributors. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/Doc/reference/compound_stmts.rst b/Doc/reference/compound_stmts.rst index f36ed3e122f..e95fa3a6424 100644 --- a/Doc/reference/compound_stmts.rst +++ b/Doc/reference/compound_stmts.rst @@ -154,15 +154,15 @@ The :keyword:`for` statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: .. productionlist:: python-grammar - for_stmt: "for" `target_list` "in" `starred_list` ":" `suite` + for_stmt: "for" `target_list` "in" `starred_expression_list` ":" `suite` : ["else" ":" `suite`] -The ``starred_list`` expression is evaluated once; it should yield an -:term:`iterable` object. An :term:`iterator` is created for that iterable. -The first item provided -by the iterator is then assigned to the target list using the standard -rules for assignments (see :ref:`assignment`), and the suite is executed. This -repeats for each item provided by the iterator. When the iterator is exhausted, +The :token:`~python-grammar:starred_expression_list` expression is evaluated +once; it should yield an :term:`iterable` object. An :term:`iterator` is +created for that iterable. The first item provided by the iterator is then +assigned to the target list using the standard rules for assignments +(see :ref:`assignment`), and the suite is executed. This repeats for each +item provided by the iterator. When the iterator is exhausted, the suite in the :keyword:`!else` clause, if present, is executed, and the loop terminates. @@ -1885,7 +1885,7 @@ expressions. The presence of annotations does not change the runtime semantics o the code, except if some mechanism is used that introspects and uses the annotations (such as :mod:`dataclasses` or :func:`functools.singledispatch`). -By default, annotations are lazily evaluated in a :ref:`annotation scope `. +By default, annotations are lazily evaluated in an :ref:`annotation scope `. This means that they are not evaluated when the code containing the annotation is evaluated. Instead, the interpreter saves information that can be used to evaluate the annotation later if requested. The :mod:`annotationlib` module provides tools for evaluating annotations. @@ -1898,6 +1898,12 @@ all annotations are instead stored as strings:: >>> f.__annotations__ {'param': 'annotation'} +This future statement will be deprecated and removed in a future version of Python, +but not before Python 3.13 reaches its end of life (see :pep:`749`). +When it is used, introspection tools like +:func:`annotationlib.get_annotations` and :func:`typing.get_type_hints` are +less likely to be able to resolve annotations at runtime. + .. rubric:: Footnotes diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 66b836eaf00..005a768f684 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1228,15 +1228,9 @@ Special attributes :attr:`__annotations__ attributes `. For best practices on working with :attr:`~object.__annotations__`, - please see :mod:`annotationlib`. - - .. caution:: - - Accessing the :attr:`!__annotations__` attribute of a class - object directly may yield incorrect results in the presence of - metaclasses. In addition, the attribute may not exist for - some classes. Use :func:`annotationlib.get_annotations` to - retrieve class annotations safely. + please see :mod:`annotationlib`. Where possible, use + :func:`annotationlib.get_annotations` instead of accessing this + attribute directly. .. versionchanged:: 3.14 Annotations are now :ref:`lazily evaluated `. @@ -1247,13 +1241,6 @@ Special attributes if the class has no annotations. See also: :attr:`__annotate__ attributes `. - .. caution:: - - Accessing the :attr:`!__annotate__` attribute of a class - object directly may yield incorrect results in the presence of - metaclasses. Use :func:`annotationlib.get_annotate_function` to - retrieve the annotate function safely. - .. versionadded:: 3.14 * - .. attribute:: type.__type_params__ @@ -1526,18 +1513,17 @@ positional arguments; bit ``0x08`` is set if the function uses the if the function is a generator. See :ref:`inspect-module-co-flags` for details on the semantics of each flags that might be present. -Future feature declarations (``from __future__ import division``) also use bits +Future feature declarations (for example, ``from __future__ import division``) also use bits in :attr:`~codeobject.co_flags` to indicate whether a code object was compiled with a -particular feature enabled: bit ``0x2000`` is set if the function was compiled -with future division enabled; bits ``0x10`` and ``0x1000`` were used in earlier -versions of Python. +particular feature enabled. See :attr:`~__future__._Feature.compiler_flag`. Other bits in :attr:`~codeobject.co_flags` are reserved for internal use. .. index:: single: documentation string If a code object represents a function and has a docstring, -the first item in :attr:`~codeobject.co_consts` is +the :data:`~inspect.CO_HAS_DOCSTRING` bit is set in :attr:`~codeobject.co_flags` +and the first item in :attr:`~codeobject.co_consts` is the docstring of the function. Methods on code objects @@ -3319,7 +3305,7 @@ left undefined. :meth:`__divmod__` method should be the equivalent to using :meth:`__floordiv__` and :meth:`__mod__`; it should not be related to :meth:`__truediv__`. Note that :meth:`__pow__` should be defined to accept - an optional third argument if the ternary version of the built-in :func:`pow` + an optional third argument if the three-argument version of the built-in :func:`pow` function is to be supported. If one of those methods does not support the operation with the supplied @@ -3356,10 +3342,15 @@ left undefined. is called if ``type(x).__sub__(x, y)`` returns :data:`NotImplemented` or ``type(y)`` is a subclass of ``type(x)``. [#]_ - .. index:: pair: built-in function; pow + Note that :meth:`__rpow__` should be defined to accept an optional third + argument if the three-argument version of the built-in :func:`pow` function + is to be supported. - Note that ternary :func:`pow` will not try calling :meth:`__rpow__` (the - coercion rules would become too complicated). + .. versionchanged:: 3.14 + + Three-argument :func:`pow` now try calling :meth:`~object.__rpow__` if necessary. + Previously it was only called in two-argument :func:`!pow` and the binary + power operator. .. note:: diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 8837344e5dd..2a550b504ca 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1928,7 +1928,7 @@ Expression lists single: , (comma); expression list .. productionlist:: python-grammar - starred_expression: ["*"] `or_expr` + starred_expression: "*" `or_expr` | `expression` flexible_expression: `assignment_expression` | `starred_expression` flexible_expression_list: `flexible_expression` ("," `flexible_expression`)* [","] starred_expression_list: `starred_expression` ("," `starred_expression`)* [","] diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ff801a7d4fc..6c4a4ea81af 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -35,11 +35,11 @@ Logical lines .. index:: logical line, physical line, line joining, NEWLINE token -The end of a logical line is represented by the token NEWLINE. Statements -cannot cross logical line boundaries except where NEWLINE is allowed by the -syntax (e.g., between statements in compound statements). A logical line is -constructed from one or more *physical lines* by following the explicit or -implicit *line joining* rules. +The end of a logical line is represented by the token :data:`~token.NEWLINE`. +Statements cannot cross logical line boundaries except where :data:`!NEWLINE` +is allowed by the syntax (e.g., between statements in compound statements). +A logical line is constructed from one or more *physical lines* by following +the explicit or implicit *line joining* rules. .. _physical-lines: @@ -99,7 +99,7 @@ which is recognized by Bram Moolenaar's VIM. If no encoding declaration is found, the default encoding is UTF-8. If the implicit or explicit encoding of a file is UTF-8, an initial UTF-8 byte-order -mark (b'\xef\xbb\xbf') is ignored rather than being a syntax error. +mark (``b'\xef\xbb\xbf'``) is ignored rather than being a syntax error. If an encoding is declared, the encoding name must be recognized by Python (see :ref:`standard-encodings`). The @@ -160,11 +160,12 @@ Blank lines .. index:: single: blank line A logical line that contains only spaces, tabs, formfeeds and possibly a -comment, is ignored (i.e., no NEWLINE token is generated). During interactive -input of statements, handling of a blank line may differ depending on the -implementation of the read-eval-print loop. In the standard interactive -interpreter, an entirely blank logical line (i.e. one containing not even -whitespace or a comment) terminates a multi-line statement. +comment, is ignored (i.e., no :data:`~token.NEWLINE` token is generated). +During interactive input of statements, handling of a blank line may differ +depending on the implementation of the read-eval-print loop. +In the standard interactive interpreter, an entirely blank logical line (that +is, one containing not even whitespace or a comment) terminates a multi-line +statement. .. _indentation: @@ -202,19 +203,20 @@ the space count to zero). .. index:: INDENT token, DEDENT token -The indentation levels of consecutive lines are used to generate INDENT and -DEDENT tokens, using a stack, as follows. +The indentation levels of consecutive lines are used to generate +:data:`~token.INDENT` and :data:`~token.DEDENT` tokens, using a stack, +as follows. Before the first line of the file is read, a single zero is pushed on the stack; this will never be popped off again. The numbers pushed on the stack will always be strictly increasing from bottom to top. At the beginning of each logical line, the line's indentation level is compared to the top of the stack. If it is equal, nothing happens. If it is larger, it is pushed on the stack, and -one INDENT token is generated. If it is smaller, it *must* be one of the +one :data:`!INDENT` token is generated. If it is smaller, it *must* be one of the numbers occurring on the stack; all numbers on the stack that are larger are -popped off, and for each number popped off a DEDENT token is generated. At the -end of the file, a DEDENT token is generated for each number remaining on the -stack that is larger than zero. +popped off, and for each number popped off a :data:`!DEDENT` token is generated. +At the end of the file, a :data:`!DEDENT` token is generated for each number +remaining on the stack that is larger than zero. Here is an example of a correctly (though confusingly) indented piece of Python code:: @@ -254,8 +256,18 @@ Whitespace between tokens Except at the beginning of a logical line or in string literals, the whitespace characters space, tab and formfeed can be used interchangeably to separate tokens. Whitespace is needed between two tokens only if their concatenation -could otherwise be interpreted as a different token (e.g., ab is one token, but -a b is two tokens). +could otherwise be interpreted as a different token. For example, ``ab`` is one +token, but ``a b`` is two tokens. However, ``+a`` and ``+ a`` both produce +two tokens, ``+`` and ``a``, as ``+a`` is not a valid token. + + +.. _endmarker-token: + +End marker +---------- + +At the end of non-interactive input, the lexical analyzer generates an +:data:`~token.ENDMARKER` token. .. _other-tokens: @@ -263,67 +275,94 @@ a b is two tokens). Other tokens ============ -Besides NEWLINE, INDENT and DEDENT, the following categories of tokens exist: -*identifiers*, *keywords*, *literals*, *operators*, and *delimiters*. Whitespace -characters (other than line terminators, discussed earlier) are not tokens, but -serve to delimit tokens. Where ambiguity exists, a token comprises the longest -possible string that forms a legal token, when read from left to right. +Besides :data:`~token.NEWLINE`, :data:`~token.INDENT` and :data:`~token.DEDENT`, +the following categories of tokens exist: +*identifiers* and *keywords* (:data:`~token.NAME`), *literals* (such as +:data:`~token.NUMBER` and :data:`~token.STRING`), and other symbols +(*operators* and *delimiters*, :data:`~token.OP`). +Whitespace characters (other than logical line terminators, discussed earlier) +are not tokens, but serve to delimit tokens. +Where ambiguity exists, a token comprises the longest possible string that +forms a legal token, when read from left to right. .. _identifiers: -Identifiers and keywords -======================== +Names (identifiers and keywords) +================================ .. index:: identifier, name -Identifiers (also referred to as *names*) are described by the following lexical -definitions. +:data:`~token.NAME` tokens represent *identifiers*, *keywords*, and +*soft keywords*. -The syntax of identifiers in Python is based on the Unicode standard annex -UAX-31, with elaboration and changes as defined below; see also :pep:`3131` for -further details. - -Within the ASCII range (U+0001..U+007F), the valid characters for identifiers -include the uppercase and lowercase letters ``A`` through -``Z``, the underscore ``_`` and, except for the first character, the digits +Within the ASCII range (U+0001..U+007F), the valid characters for names +include the uppercase and lowercase letters (``A-Z`` and ``a-z``), +the underscore ``_`` and, except for the first character, the digits ``0`` through ``9``. -Python 3.0 introduced additional characters from outside the ASCII range (see -:pep:`3131`). For these characters, the classification uses the version of the -Unicode Character Database as included in the :mod:`unicodedata` module. -Identifiers are unlimited in length. Case is significant. +Names must contain at least one character, but have no upper length limit. +Case is significant. -.. productionlist:: python-grammar - identifier: `xid_start` `xid_continue`* - id_start: - id_continue: - xid_start: - xid_continue: +Besides ``A-Z``, ``a-z``, ``_`` and ``0-9``, names can also use "letter-like" +and "number-like" characters from outside the ASCII range, as detailed below. -The Unicode category codes mentioned above stand for: +All identifiers are converted into the `normalization form`_ NFKC while +parsing; comparison of identifiers is based on NFKC. -* *Lu* - uppercase letters -* *Ll* - lowercase letters -* *Lt* - titlecase letters -* *Lm* - modifier letters -* *Lo* - other letters -* *Nl* - letter numbers -* *Mn* - nonspacing marks -* *Mc* - spacing combining marks -* *Nd* - decimal numbers -* *Pc* - connector punctuations -* *Other_ID_Start* - explicit list of characters in `PropList.txt - `_ to support backwards - compatibility -* *Other_ID_Continue* - likewise +Formally, the first character of a normalized identifier must belong to the +set ``id_start``, which is the union of: -All identifiers are converted into the normal form NFKC while parsing; comparison -of identifiers is based on NFKC. +* Unicode category ```` - uppercase letters (includes ``A`` to ``Z``) +* Unicode category ```` - lowercase letters (includes ``a`` to ``z``) +* Unicode category ```` - titlecase letters +* Unicode category ```` - modifier letters +* Unicode category ```` - other letters +* Unicode category ```` - letter numbers +* {``"_"``} - the underscore +* ```` - an explicit set of characters in `PropList.txt`_ + to support backwards compatibility -A non-normative HTML file listing all valid identifier characters for Unicode -16.0.0 can be found at -https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +The remaining characters must belong to the set ``id_continue``, which is the +union of: + +* all characters in ``id_start`` +* Unicode category ```` - decimal numbers (includes ``0`` to ``9``) +* Unicode category ```` - connector punctuations +* Unicode category ```` - nonspacing marks +* Unicode category ```` - spacing combining marks +* ```` - another explicit set of characters in + `PropList.txt`_ to support backwards compatibility + +Unicode categories use the version of the Unicode Character Database as +included in the :mod:`unicodedata` module. + +These sets are based on the Unicode standard annex `UAX-31`_. +See also :pep:`3131` for further details. + +Even more formally, names are described by the following lexical definitions: + +.. grammar-snippet:: + :group: python-grammar + + NAME: `xid_start` `xid_continue`* + id_start: | | | | | | "_" | + id_continue: `id_start` | | | | | + xid_start: + xid_continue: + identifier: <`NAME`, except keywords> + +A non-normative listing of all valid identifier characters as defined by +Unicode is available in the `DerivedCoreProperties.txt`_ file in the Unicode +Character Database. + + +.. _UAX-31: https://www.unicode.org/reports/tr31/ +.. _PropList.txt: https://www.unicode.org/Public/16.0.0/ucd/PropList.txt +.. _DerivedCoreProperties.txt: https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +.. _normalization form: https://www.unicode.org/reports/tr15/#Norm_Forms .. _keywords: @@ -335,7 +374,7 @@ Keywords single: keyword single: reserved word -The following identifiers are used as reserved words, or *keywords* of the +The following names are used as reserved words, or *keywords* of the language, and cannot be used as ordinary identifiers. They must be spelled exactly as written here: @@ -359,18 +398,19 @@ Soft Keywords .. versionadded:: 3.10 -Some identifiers are only reserved under specific contexts. These are known as -*soft keywords*. The identifiers ``match``, ``case``, ``type`` and ``_`` can -syntactically act as keywords in certain contexts, +Some names are only reserved under specific contexts. These are known as +*soft keywords*: + +- ``match``, ``case``, and ``_``, when used in the :keyword:`match` statement. +- ``type``, when used in the :keyword:`type` statement. + +These syntactically act as keywords in their specific contexts, but this distinction is done at the parser level, not when tokenizing. As soft keywords, their use in the grammar is possible while still preserving compatibility with existing code that uses these names as identifier names. -``match``, ``case``, and ``_`` are used in the :keyword:`match` statement. -``type`` is used in the :keyword:`type` statement. - .. versionchanged:: 3.12 ``type`` is now a soft keyword. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index f43dba19d13..e3bcb968128 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -14,7 +14,6 @@ Doc/c-api/typeobj.rst Doc/extending/extending.rst Doc/library/ast.rst Doc/library/asyncio-extending.rst -Doc/library/decimal.rst Doc/library/email.charset.rst Doc/library/email.compat32-message.rst Doc/library/email.parser.rst @@ -32,7 +31,6 @@ Doc/library/optparse.rst Doc/library/os.rst Doc/library/pickletools.rst Doc/library/platform.rst -Doc/library/plistlib.rst Doc/library/profile.rst Doc/library/pyexpat.rst Doc/library/resource.rst diff --git a/Doc/tools/templates/_docs_by_version.html b/Doc/tools/templates/_docs_by_version.html new file mode 100644 index 00000000000..1a84cfbf229 --- /dev/null +++ b/Doc/tools/templates/_docs_by_version.html @@ -0,0 +1,11 @@ +{# +This file is only used in indexsidebar.html, where it is included in the docs +by version list. For non-end-of-life branches, build_docs.py overwrites this +list with the full list of versions. + +Keep the following two files synchronised: +* cpython/Doc/tools/templates/_docs_by_version.html +* docsbuild-scripts/templates/_docs_by_version.html +#} +
  • {% trans %}Stable{% endtrans %}
  • +
  • {% trans %}In development{% endtrans %}
  • diff --git a/Doc/tools/templates/customsourcelink.html b/Doc/tools/templates/customsourcelink.html index eb9db9e341b..43d3a7a892a 100644 --- a/Doc/tools/templates/customsourcelink.html +++ b/Doc/tools/templates/customsourcelink.html @@ -1,11 +1,11 @@ {%- if show_source and has_source and sourcename %}
    -

    {{ _('This Page') }}

    +

    {{ _('This page') }}

    diff --git a/Doc/tools/templates/download.html b/Doc/tools/templates/download.html index 4645f7d394e..47a57eb111b 100644 --- a/Doc/tools/templates/download.html +++ b/Doc/tools/templates/download.html @@ -27,7 +27,7 @@ {%- endblock -%} {% block body %} -

    {% trans %}Download Python {{ dl_version }} Documentation{% endtrans %}

    +

    {% trans %}Download Python {{ dl_version }} documentation{% endtrans %}

    {% if last_updated %}

    {% trans %}Last updated on: {{ last_updated }}.{% endtrans %}

    {% endif %} diff --git a/Doc/tools/templates/indexcontent.html b/Doc/tools/templates/indexcontent.html index 06a4223643a..544cc4234f4 100644 --- a/Doc/tools/templates/indexcontent.html +++ b/Doc/tools/templates/indexcontent.html @@ -72,7 +72,7 @@

    {{ docstitle|e }}

    - + diff --git a/Doc/tools/templates/indexsidebar.html b/Doc/tools/templates/indexsidebar.html index 5986204256f..086f15662cf 100644 --- a/Doc/tools/templates/indexsidebar.html +++ b/Doc/tools/templates/indexsidebar.html @@ -2,17 +2,16 @@

    {% trans %}Download{% endtrans %}

    {% trans %}Download these documents{% endtrans %}

    {% trans %}Docs by version{% endtrans %}

    -

    {% trans %}Other resources{% endtrans %}

    diff --git a/Doc/tools/templates/layout.html b/Doc/tools/templates/layout.html index 69c2ab6ae05..1cb0200822d 100644 --- a/Doc/tools/templates/layout.html +++ b/Doc/tools/templates/layout.html @@ -26,12 +26,11 @@ {% endblock %} {% block extrahead %} - {% if builder == "html" and enable_analytics %} - - - {% endif %} - - {% if builder != "htmlhelp" %} + {% if builder == "html" %} + {% if enable_analytics %} + + {% endif %} + {% if pagename == 'whatsnew/changelog' and not embedded %} {% endif %} {% endif %} diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 95939242fb7..5c0e8f34bf8 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -999,7 +999,8 @@ scope:: 43 The above example uses a lambda expression to return a function. Another use -is to pass a small function as an argument:: +is to pass a small function as an argument. For instance, :meth:`list.sort` +takes a sorting key function *key* which can be a lambda function:: >>> pairs = [(1, 'one'), (2, 'two'), (3, 'three'), (4, 'four')] >>> pairs.sort(key=lambda pair: pair[1]) @@ -1055,7 +1056,7 @@ Here is an example of a multi-line docstring:: >>> print(my_function.__doc__) Do nothing, but document it. - No, really, it doesn't do anything. + No, really, it doesn't do anything. .. _tut-annotations: diff --git a/Doc/tutorial/index.rst b/Doc/tutorial/index.rst index 96791f88c86..d0bf77dc40d 100644 --- a/Doc/tutorial/index.rst +++ b/Doc/tutorial/index.rst @@ -4,6 +4,10 @@ The Python Tutorial ###################### +.. Tip:: This tutorial is designed for + *programmers* that are new to the Python language, + **not** *beginners* who are new to programming. + Python is an easy to learn, powerful programming language. It has efficient high-level data structures and a simple but effective approach to object-oriented programming. Python's elegant syntax and dynamic typing, @@ -21,7 +25,8 @@ implemented in C or C++ (or other languages callable from C). Python is also suitable as an extension language for customizable applications. This tutorial introduces the reader informally to the basic concepts and -features of the Python language and system. It helps to have a Python +features of the Python language and system. Be aware that it expects you to +have a basic understanding of programming in general. It helps to have a Python interpreter handy for hands-on experience, but all examples are self-contained, so the tutorial can be read off-line as well. diff --git a/Doc/tutorial/interpreter.rst b/Doc/tutorial/interpreter.rst index 02e7de77322..cd526071424 100644 --- a/Doc/tutorial/interpreter.rst +++ b/Doc/tutorial/interpreter.rst @@ -16,7 +16,7 @@ Unix shell's search path makes it possible to start it by typing the command: .. code-block:: text - python3.14 + python3.15 to the shell. [#]_ Since the choice of the directory where the interpreter lives is an installation option, other places are possible; check with your local @@ -97,8 +97,8 @@ before printing the first prompt: .. code-block:: shell-session - $ python3.14 - Python 3.14 (default, April 4 2024, 09:25:04) + $ python3.15 + Python 3.15 (default, May 7 2025, 15:46:04) [GCC 10.2.0] on linux Type "help", "copyright", "credits" or "license" for more information. >>> diff --git a/Doc/tutorial/introduction.rst b/Doc/tutorial/introduction.rst index bec5da8fd75..cdb35da7bc9 100644 --- a/Doc/tutorial/introduction.rst +++ b/Doc/tutorial/introduction.rst @@ -147,6 +147,8 @@ Python can manipulate text (represented by type :class:`str`, so-called "``Yay! :)``". They can be enclosed in single quotes (``'...'``) or double quotes (``"..."``) with the same result [#]_. +.. code-block:: pycon + >>> 'spam eggs' # single quotes 'spam eggs' >>> "Paris rabbit got your back :)! Yay!" # double quotes diff --git a/Doc/tutorial/stdlib.rst b/Doc/tutorial/stdlib.rst index 4b3eef313e7..d83ecca270b 100644 --- a/Doc/tutorial/stdlib.rst +++ b/Doc/tutorial/stdlib.rst @@ -15,7 +15,7 @@ operating system:: >>> import os >>> os.getcwd() # Return the current working directory - 'C:\\Python314' + 'C:\\Python315' >>> os.chdir('/server/accesslogs') # Change current working directory >>> os.system('mkdir today') # Run the command mkdir in the system shell 0 diff --git a/Doc/tutorial/stdlib2.rst b/Doc/tutorial/stdlib2.rst index a2f96b34b2d..678b71c9274 100644 --- a/Doc/tutorial/stdlib2.rst +++ b/Doc/tutorial/stdlib2.rst @@ -279,7 +279,7 @@ applications include caching objects that are expensive to create:: Traceback (most recent call last): File "", line 1, in d['primary'] # entry was automatically removed - File "C:/python314/lib/weakref.py", line 46, in __getitem__ + File "C:/python315/lib/weakref.py", line 46, in __getitem__ o = self.data[key]() KeyError: 'primary' diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 9b5c6eb863e..40a46a62031 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -73,6 +73,9 @@ source. .. audit-event:: cpython.run_command command cmdoption-c + .. versionchanged:: 3.14 + *command* is automatically dedented before execution. + .. option:: -m Search :data:`sys.path` for the named module and execute its contents as @@ -536,11 +539,21 @@ Miscellaneous options * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded - application. Typical usage is ``python3 -X importtime -c 'import - asyncio'``. See also :envvar:`PYTHONPROFILEIMPORTTIME`. + application. Typical usage is ``python -X importtime -c 'import asyncio'``. + + ``-X importtime=2`` enables additional output that indicates when an + imported module has already been loaded. In such cases, the string + ``cached`` will be printed in both time columns. + + See also :envvar:`PYTHONPROFILEIMPORTTIME`. .. versionadded:: 3.7 + .. versionchanged:: 3.14 + + Added ``-X importtime=2`` to also trace imports of loaded modules, + and reserved values other than ``1`` and ``2`` for future use. + * ``-X dev``: enable :ref:`Python Development Mode `, introducing additional runtime checks that are too expensive to be enabled by default. See also :envvar:`PYTHONDEVMODE`. @@ -667,6 +680,13 @@ Miscellaneous options .. versionchanged:: 3.10 Removed the ``-X oldparser`` option. +.. versionremoved:: 3.14 + + :option:`!-J` is no longer reserved for use by Jython_, + and now has no special meaning. + + .. _Jython: https://www.jython.org/ + .. _using-on-controlling-color: Controlling color @@ -691,15 +711,6 @@ output. To control the color output only in the Python interpreter, the precedence over ``NO_COLOR``, which in turn takes precedence over ``FORCE_COLOR``. -Options you shouldn't use -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. option:: -J - - Reserved for use by Jython_. - -.. _Jython: https://www.jython.org/ - .. _using-on-envvars: @@ -981,12 +992,17 @@ conflict. .. envvar:: PYTHONPROFILEIMPORTTIME - If this environment variable is set to a non-empty string, Python will - show how long each import takes. + If this environment variable is set to ``1``, Python will show + how long each import takes. If set to ``2``, Python will include output for + imported modules that have already been loaded. This is equivalent to setting the :option:`-X` ``importtime`` option. .. versionadded:: 3.7 + .. versionchanged:: 3.14 + + Added ``PYTHONPROFILEIMPORTTIME=2`` to also trace imports of loaded modules. + .. envvar:: PYTHONASYNCIODEBUG @@ -1278,6 +1294,14 @@ conflict. .. versionadded:: 3.14 +.. envvar:: PYTHON_JIT + + On builds where experimental just-in-time compilation is available, this + variable can force the JIT to be disabled (``0``) or enabled (``1``) at + interpreter startup. + + .. versionadded:: 3.13 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 899920acf56..b914d3397b6 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -22,7 +22,7 @@ Features and minimum versions required to build CPython: * Support for threads. -* OpenSSL 1.1.1 is the minimum version and OpenSSL 3.0.9 is the recommended +* OpenSSL 1.1.1 is the minimum version and OpenSSL 3.0.16 is the recommended minimum version for the :mod:`ssl` and :mod:`hashlib` extension modules. * SQLite 3.15.2 for the :mod:`sqlite3` extension module. @@ -302,14 +302,21 @@ General Options .. option:: --enable-experimental-jit=[no|yes|yes-off|interpreter] - Indicate how to integrate the :ref:`JIT compiler `. + Indicate how to integrate the :ref:`experimental just-in-time compiler `. - * ``no`` - build the interpreter without the JIT. - * ``yes`` - build the interpreter with the JIT. - * ``yes-off`` - build the interpreter with the JIT but disable it by default. - * ``interpreter`` - build the interpreter without the JIT, but with the tier 2 enabled interpreter. + * ``no``: Don't build the JIT. + * ``yes``: Enable the JIT. To disable it at runtime, set the environment + variable :envvar:`PYTHON_JIT=0 `. + * ``yes-off``: Build the JIT, but disable it by default. To enable it at + runtime, set the environment variable :envvar:`PYTHON_JIT=1 `. + * ``interpreter``: Enable the "JIT interpreter" (only useful for those + debugging the JIT itself). To disable it at runtime, set the environment + variable :envvar:`PYTHON_JIT=0 `. - By convention, ``--enable-experimental-jit`` is a shorthand for ``--enable-experimental-jit=yes``. + ``--enable-experimental-jit=no`` is the default behavior if the option is not + provided, and ``--enable-experimental-jit`` is shorthand for + ``--enable-experimental-jit=yes``. See :file:`Tools/jit/README.md` for more + information, including how to install the necessary build-time dependencies. .. note:: diff --git a/Doc/using/windows.rst b/Doc/using/windows.rst index 1a6322d7234..9628da3d2f6 100644 --- a/Doc/using/windows.rst +++ b/Doc/using/windows.rst @@ -1,25 +1,977 @@ .. highlight:: none +.. _python.org/downloads: https://www.python.org/downloads/ + +.. _Microsoft Store app: https://apps.microsoft.com/detail/9NQ7512CXL7T + .. _using-on-windows: ************************* Using Python on Windows ************************* -.. sectionauthor:: Robert Lehmann -.. sectionauthor:: Steve Dower +.. sectionauthor:: Steve Dower This document aims to give an overview of Windows-specific behaviour you should know about when using Python on Microsoft Windows. Unlike most Unix systems and services, Windows does not include a system -supported installation of Python. To make Python available, the CPython team -has compiled Windows installers with every `release -`_ for many years. These installers -are primarily intended to add a per-user installation of Python, with the -core interpreter and library being used by a single user. The installer is also -able to install for all users of a single machine, and a separate ZIP file is -available for application-local distributions. +supported installation of Python. Instead, Python can be obtained from a number +of distributors, including directly from the CPython team. Each Python +distribution will have its own benefits and drawbacks, however, consistency with +other tools you are using is generally a worthwhile benefit. Before committing +to the process described here, we recommend investigating your existing tools to +see if they can provide Python directly. + +To obtain Python from the CPython team, use the Python Install Manager. This +is a standalone tool that makes Python available as global commands on your +Windows machine, integrates with the system, and supports updates over time. You +can download the Python Install Manager from `python.org/downloads`_ or through +the `Microsoft Store app`_. + +Once you have installed the Python Install Manager, the global ``python`` +command can be used from any terminal to launch your current latest version of +Python. This version may change over time as you add or remove different +versions, and the ``py list`` command will show which is current. + +In general, we recommend that you create a :ref:`virtual environment ` +for each project and run ``\Scripts\Activate`` in your terminal to use it. +This provides isolation between projects, consistency over time, and ensures +that additional commands added by packages are also available in your session. +Create a virtual environment using ``python -m venv ``. + +If the ``python`` or ``py`` commands do not seem to be working, please see the +:ref:`Troubleshooting ` section below. There are +sometimes additional manual steps required to configure your PC. + +Apart from using the Python install manager, Python can also be obtained as +NuGet packages. See :ref:`windows-nuget` below for more information on these +packages. + +The embeddable distros are minimal packages of Python suitable for embedding +into larger applications. They can be installed using the Python install +manager. See :ref:`windows-embeddable` below for more information on these +packages. + + +.. _pymanager: +.. _windows-store: +.. _setting-envvars: +.. _windows-path-mod: +.. _launcher: + +Python Install Manager +====================== + +Installation +------------ + +The Python install manager can be installed from the `Microsoft Store app`_ +or downloaded and installed from `python.org/downloads`_. The two versions are +identical. + +To install through the Store, simply click "Install". After it has completed, +open a terminal and type ``python`` to get started. + +To install the file downloaded from python.org, either double-click and select +"Install", or run ``Add-AppxPackage `` in Windows Powershell. + +After installation, the ``python``, ``py``, and ``pymanager`` commands should be +available. If you have existing installations of Python, or you have modified +your :envvar:`PATH` variable, you may need to remove them or undo the +modifications. See :ref:`pymanager-troubleshoot` for more help with fixing +non-working commands. + +When you first install a runtime, you will likely be prompted to add a directory +to your :envvar:`PATH`. This is optional, if you prefer to use the ``py`` +command, but is offered for those who prefer the full range of aliases (such +as ``python3.14.exe``) to be available. The directory will be +:file:`%LocalAppData%\\Python\\bin` by default, but may be customized by an +administrator. Click Start and search for "Edit environment variables for your +account" for the system settings page to add the path. + +Each Python runtime you install will have its own directory for scripts. These +also need to be added to :envvar:`PATH` if you want to use them. + +The Python install manager will be automatically updated to new releases. This +does not affect any installs of Python runtimes. Uninstalling the Python install +manager does not uninstall any Python runtimes. + +If you are not able to install an MSIX in your context, for example, you are +using automated deployment software that does not support it, or are targeting +Windows Server 2019, please see :ref:`pymanager-advancedinstall` below for more +information. + + +Basic Use +--------- + +The recommended command for launching Python is ``python``, which will either +launch the version requested by the script being launched, an active virtual +environment, or the default installed version, which will be the latest stable +release unless configured otherwise. If no version is specifically requested and +no runtimes are installed at all, the current latest release will be installed +automatically. + +For all scenarios involving multiple runtime versions, the recommended command +is ``py``. This may be used anywhere in place of ``python`` or the older +``py.exe`` launcher. By default, ``py`` matches the behaviour of ``python``, but +also allows command line options to select a specific version as well as +subcommands to manage installations. These are detailed below. + +Because the ``py`` command may already be taken by the previous version, there +is also an unambiguous ``pymanager`` command. Scripted installs that are +intending to use Python install manager should consider using ``pymanager``, due +to the lower chance of encountering a conflict with existing installs. The only +difference between the two commands is when running without any arguments: +``py`` will install and launch your default interpreter, while ``pymanager`` +will display help (``pymanager exec ...`` provides equivalent behaviour to +``py ...``). + +Each of these commands also has a windowed version that avoids creating a +console window. These are ``pyw``, ``pythonw`` and ``pymanagerw``. A ``python3`` +command is also included that mimics the ``python`` command. It is intended to +catch accidental uses of the typical POSIX command on Windows, but is not meant +to be widely used or recommended. + +To launch your default runtime, run ``python`` or ``py`` with the arguments you +want to be passed to the runtime (such as script files or the module to launch): + +.. code:: + + $> py + ... + $> python my-script.py + ... + $> py -m this + ... + +The default runtime can be overridden with the :envvar:`PYTHON_MANAGER_DEFAULT` +environment variable, or a configuration file. See :ref:`pymanager-config` for +information about configuration settings. + +To launch a specific runtime, the ``py`` command accepts a ``-V:`` option. +This option must be specified before any others. The tag is part or all of the +identifier for the runtime; for those from the CPython team, it looks like the +version, potentially with the platform. For compatibility, the ``V:`` may be +omitted in cases where the tag refers to an official release and starts with +``3``. + +.. code:: + + $> py -V:3.14 ... + $> py -V:3-arm64 ... + +Runtimes from other distributors may require the *company* to be included as +well. This should be separated from the tag by a slash, and may be a prefix. +Specifying the company is optional when it is ``PythonCore``, and specifying the +tag is optional (but not the slash) when you want the latest release from a +specific company. + +.. code:: + + $> py -V:Distributor\1.0 ... + $> py -V:distrib/ ... + +If no version is specified, but a script file is passed, the script will be +inspected for a *shebang line*. This is a special format for the first line in +a file that allows overriding the command. See :ref:`pymanager-shebang` for more +information. When there is no shebang line, or it cannot be resolved, the script +will be launched with the default runtime. + +If you are running in an active virtual environment, have not requested a +particular version, and there is no shebang line, the default runtime will be +that virtual environment. In this scenario, the ``python`` command was likely +already overridden and none of these checks occurred. However, this behaviour +ensures that the ``py`` command can be used interchangeably. + +When you launch either ``python`` or ``py`` but do not have any runtimes +installed, and the requested version is the default, it will be installed +automatically and then launched. Otherwise, the requested version will be +installed if automatic installation is configured (most likely by setting +``PYTHON_MANAGER_AUTOMATIC_INSTALL`` to ``true``), or if the ``py exec`` or +``pymanager exec`` forms of the command were used. + + +Command Help +------------ + +The ``py help`` command will display the full list of supported commands, along +with their options. Any command may be passed the ``-?`` option to display its +help, or its name passed to ``py help``. + +.. code:: + + $> py help + $> py help install + $> py install /? + + +All commands support some common options, which will be shown by ``py help``. +These options must be specified after any subcommand. Specifying ``-v`` or +``--verbose`` will increase the amount of output shown, and ``-vv`` will +increase it further for debugging purposes. Passing ``-q`` or ``--quiet`` will +reduce output, and ``-qq`` will reduce it further. + +The ``--config=`` option allows specifying a configuration file to +override multiple settings at once. See :ref:`pymanager-config` below for more +information about these files. + + +Listing Runtimes +---------------- + +.. code:: + + $> py list [-f=|--format=] [-1|--one] [--online|-s=|--source=] [...] + +The list of installed runtimes can be seen using ``py list``. A filter may be +added in the form of one or more tags (with or without company specifier), and +each may include a ``<``, ``<=``, ``>=`` or ``>`` prefix to restrict to a range. + +A range of formats are supported, and can be passed as the ``--format=`` or +``-f `` option. Formats include ``table`` (a user friendly table view), +``csv`` (comma-separated table), ``json`` (a single JSON blob), ``jsonl`` (one +JSON blob per result), ``exe`` (just the executable path), ``prefix`` (just the +prefix path). + +The ``--one`` or ``-1`` option only displays a single result. If the default +runtime is included, it will be the one. Otherwise, the "best" result is shown +("best" is deliberately vaguely defined, but will usually be the most recent +version). The result shown by ``py list --one `` will match the runtime +that would be launched by ``py -V:``. + +The ``--only-managed`` option excludes results that were not installed by the +Python install manager. This is useful when determining which runtimes may be +updated or uninstalled through the ``py`` command. + +The ``--online`` option is short for passing ``--source=`` with the default +source. Passing either of these options will search the online index for +runtimes that can be installed. The result shown by ``py list --online --one +`` will match the runtime that would be installed by ``py install ``. + +.. code:: + + $> py list --online 3.14 + +For compatibility with the old launcher, the ``--list``, ``--list-paths``, +``-0`` and ``-0p`` commands (e.g. ``py -0p``) are retained. They do not allow +additional options, and will produce legacy formatted output. + + +Installing Runtimes +------------------- + +.. code:: + + $> py install [-s=|--source=] [-f|--force] [-u|--update] [--dry-run] [...] + +New runtime versions may be added using ``py install``. One or more tags may be +specified, and the special tag ``default`` may be used to select the default. +Ranges are not supported for installation. + +The ``--source=`` option allows overriding the online index that is used to +obtain runtimes. This may be used with an offline index, as shown in +:ref:`pymanager-offline`. + +Passing ``--force`` will ignore any cached files and remove any existing install +to replace it with the specified one. + +Passing ``--update`` will replace existing installs if the new version is newer. +Otherwise, they will be left. If no tags are provided with ``--update``, all +installs managed by the Python install manager will be updated if newer versions +are available. Updates will remove any modifications made to the install, +including globally installed packages, but virtual environments will continue to +work. + +Passing ``--dry-run`` will generate output and logs, but will not modify any +installs. + +In addition to the above options, the ``--target`` option will extract the +runtime to the specified directory instead of doing a normal install. This is +useful for embedding runtimes into larger applications. + +.. code:: + + $> py install ... [-t=|--target=] + + +.. _pymanager-offline: + +Offline Installs +---------------- + +To perform offline installs of Python, you will need to first create an offline +index on a machine that has network access. + +.. code:: + + $> py install --download= ... ... + +The ``--download=`` option will download the packages for the listed tags +and create a directory containing them and an ``index.json`` file suitable for +later installation. This entire directory can be moved to the offline machine +and used to install one or more of the bundled runtimes: + +.. code:: + + $> py install --source="\index.json" ... + +The Python install manager can be installed by downloading its installer and +moving it to another machine before installing. + +Alternatively, the ZIP files in an offline index directory can simply be +transferred to another machine and extracted. This will not register the install +in any way, and so it must be launched by directly referencing the executables +in the extracted directory, but it is sometimes a preferable approach in cases +where installing the Python install manager is not possible or convenient. + +In this way, Python runtimes can be installed and managed on a machine without +access to the internet. + + +Uninstalling Runtimes +--------------------- + +.. code:: + + $> py uninstall [-y|--yes] ... + +Runtimes may be removed using the ``py uninstall`` command. One or more tags +must be specified. Ranges are not supported here. + +The ``--yes`` option bypasses the confirmation prompt before uninstalling. + +Instead of passing tags individually, the ``--purge`` option may be specified. +This will remove all runtimes managed by the Python install manager, including +cleaning up the Start menu, registry, and any download caches. Runtimes that +were not installed by the Python install manager will not be impacted, and +neither will manually created configuration files. + +.. code:: + + $> py uninstall [-y|--yes] --purge + +The Python install manager can be uninstalled through the Windows "Installed +apps" settings page. This does not remove any runtimes, and they will still be +usable, though the global ``python`` and ``py`` commands will be removed. +Reinstalling the Python install manager will allow you to manage these runtimes +again. To completely clean up all Python runtimes, run with ``--purge`` before +uninstalling the Python install manager. + +.. _pymanager-config: + +Configuration +------------- + +Python install manager is configured with a hierarchy of configuration files, +environment variables, command-line options, and registry settings. In general, +configuration files have the ability to configure everything, including the +location of other configuration files, while registry settings are +administrator-only and will override configuration files. Command-line options +override all other settings, but not every option is available. + +This section will describe the defaults, but be aware that modified or +overridden installs may resolve settings differently. + +A global configuration file may be configured by an administrator, and would be +read first. The user configuration file is stored at +:file:`%AppData%\\Python\\pymanager.json` (by default) and is read next, +overwriting any settings from earlier files. An additional configuration file +may be specified as the ``PYTHON_MANAGER_CONFIG`` environment variable or the +``--config`` command line option (but not both). + +The following settings are those that are considered likely to be modified in +normal use. Later sections list those that are intended for administrative +customization. + +.. csv-table:: Standard configuration options + :header: "Config Key", "Environment Variable", "Description" + :widths: 2, 2, 4 + + ``default_tag``,``PYTHON_MANAGER_DEFAULT``,"The preferred default + version to launch or install. By default, this is interpreted as the most + recent non-prerelease version from the CPython team. + " + ``default_platform``,``PYTHON_MANAGER_DEFAULT_PLATFORM``,"The preferred + default platform to launch or install. This is treated as a suffix to the + specified tag, such that ``py -V:3.14`` would prefer an install for + ``3.14-64`` if it exists (and ``default_platform`` is ``-64``), but will use + ``3.14`` if no tagged install exists. + " + ``logs_dir``,``PYTHON_MANAGER_LOGS``,"The location where log files are + written. By default, :file:`%TEMP%`. + " + ``automatic_install``,``PYTHON_MANAGER_AUTOMATIC_INSTALL``,"True to + allow automatic installs when specifying a particular runtime to launch. + By default, true. + " + ``include_unmanaged``,``PYTHON_MANAGER_INCLUDE_UNMANAGED``,"True to + allow listing and launching runtimes that were not installed by the Python + install manager, or false to exclude them. By default, true. + " + ``shebang_can_run_anything``,"``PYTHON_MANAGER_SHEBANG_CAN_RUN_ANYTHING`` + ","True to allow shebangs in ``.py`` files to launch applications other than + Python runtimes, or false to prevent it. By default, true. + " + ``log_level``,"``PYMANAGER_VERBOSE``, ``PYMANAGER_DEBUG``","Set + the default level of output (0-50) By default, 20. Lower values produce more + output. The environment variables are boolean, and may produce additional + output during startup that is later suppressed by other configuration. + " + ``confirm``,``PYTHON_MANAGER_CONFIRM``,"True to confirm certain actions + before taking them (such as uninstall), or false to skip the confirmation. By + default, true. + " + ``install.source``,``PYTHON_MANAGER_SOURCE_URL``,"Override the index + feed to obtain new installs from. + " + ``list.format``,``PYTHON_MANAGER_LIST_FORMAT``,"Specify the default + format used by the ``py list`` command. By default, ``table``. + " + +Dotted names should be nested inside JSON objects, for example, ``list.format`` +would be specified as ``{"list": {"format": "table"}}``. + +.. _pymanager-shebang: + +Shebang lines +------------- + +If the first line of a script file starts with ``#!``, it is known as a +"shebang" line. Linux and other Unix like operating systems have native +support for such lines and they are commonly used on such systems to indicate +how a script should be executed. The ``python`` and ``py`` commands allow the +same facilities to be used with Python scripts on Windows. + +To allow shebang lines in Python scripts to be portable between Unix and +Windows, a number of 'virtual' commands are supported to specify which +interpreter to use. The supported virtual commands are: + +* ``/usr/bin/env `` +* ``/usr/bin/env -S `` +* ``/usr/bin/`` +* ``/usr/local/bin/`` +* ```` + +For example, if the first line of your script starts with + +.. code-block:: sh + + #! /usr/bin/python + +The default Python or an active virtual environment will be located and used. +As many Python scripts written to work on Unix will already have this line, +you should find these scripts can be used by the launcher without modification. +If you are writing a new script on Windows which you hope will be useful on +Unix, you should use one of the shebang lines starting with ``/usr``. + +Any of the above virtual commands can have ```` replaced by an alias from +an installed runtime. That is, any command generated in the global aliases +directory (which you may have added to your :envvar:`PATH` environment variable) +can be used in a shebang, even if it is not on your :envvar:`PATH`. This allows +the use of shebangs like ``/usr/bin/python3.12`` to select a particular runtime. + +If no runtimes are installed, or if automatic installation is enabled, the +requested runtime will be installed if necessary. See :ref:`pymanager-config` +for information about configuration settings. + +The ``/usr/bin/env`` form of shebang line will also search the :envvar:`PATH` +environment variable for unrecognized commands. This corresponds to the +behaviour of the Unix ``env`` program, which performs the same search, but +prefers launching known Python commands. A warning may be displayed when +searching for arbitrary executables, and this search may be disabled by the +``shebang_can_run_anything`` configuration option. + +Shebang lines that do not match any of patterns are treated as *Windows* +executable paths that are absolute or relative to the directory containing the +script file. This is a convenience for Windows-only scripts, such as those +generated by an installer, since the behavior is not compatible with Unix-style +shells. These paths may be quoted, and may include multiple arguments, after +which the path to the script and any additional arguments will be appended. +This functionality may be disabled by the ``shebang_can_run_anything`` +configuration option. + +.. note: + + The behaviour of shebangs in the Python install manager is subtly different + from the previous ``py.exe`` launcher, and the old configuration options no + longer apply. If you are specifically reliant on the old behaviour or + configuration, we recommend keeping the legacy launcher. It may be + `downloaded independently `_ + and installed on its own. The legacy launcher's ``py`` command will override + PyManager's one, and you will need to use ``pymanager`` commands for + installing and uninstalling. + + +.. _Add-AppxPackage: https://learn.microsoft.com/powershell/module/appx/add-appxpackage + +.. _Remove-AppxPackage: https://learn.microsoft.com/powershell/module/appx/remove-appxpackage + +.. _Add-AppxProvisionedPackage: https://learn.microsoft.com/powershell/module/dism/add-appxprovisionedpackage + +.. _PackageManager: https://learn.microsoft.com/uwp/api/windows.management.deployment.packagemanager + +.. _pymanager-advancedinstall: + +Advanced Installation +--------------------- + +For situations where an MSIX cannot be installed, such as some older +administrative distribution platforms, there is an MSI available from the +python.org downloads page. This MSI has no user interface, and can only perform +per-machine installs to its default location in Program Files. It will attempt +to modify the system :envvar:`PATH` environment variable to include this install +location, but be sure to validate this on your configuration. + +.. note:: + + Windows Server 2019 is the only version of Windows that CPython supports that + does not support MSIX. For Windows Server 2019, you should use the MSI. + +Be aware that the MSI package does not bundle any runtimes, and so is not +suitable for installs into offline environments without also creating an offline +install index. See :ref:`pymanager-offline` and :ref:`pymanager-admin-config` +for information on handling these scenarios. + +Runtimes installed by the MSI are shared with those installed by the MSIX, and +are all per-user only. The Python install manager does not support installing +runtimes per-machine. To emulate a per-machine install, you can use ``py install +--target=`` as administrator and add your own system-wide +modifications to :envvar:`PATH`, the registry, or the Start menu. + +When the MSIX is installed, but commands are not available in the :envvar:`PATH` +environment variable, they can be found under +:file:`%LocalAppData%\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.PythonManager_3847v3x7pw1km` +or +:file:`%LocalAppData%\\Microsoft\\WindowsApps\\PythonSoftwareFoundation.PythonManager_qbz5n2kfra8p0`, +depending on whether it was installed from python.org or through the Windows +Store. Attempting to run the executable directly from Program Files is not +recommended. + +To programmatically install the Python install manager, it is easiest to use +WinGet, which is included with all supported versions of Windows: + +.. code-block:: powershell + + $> winget install 9NQ7512CXL7T -e --accept-package-agreements --disable-interactivity + + # Optionally run the configuration checker and accept all changes + $> py install --configure -y + +To download the Python install manager and install on another machine, the +following WinGet command will download the required files from the Store to your +Downloads directory (add ``-d `` to customize the output location). +This also generates a YAML file that appears to be unnecessary, as the +downloaded MSIX can be installed by launching or using the commands below. + +.. code-block:: powershell + + $> winget download 9NQ7512CXL7T -e --skip-license --accept-package-agreements --accept-source-agreements + +To programmatically install or uninstall an MSIX using only PowerShell, the +`Add-AppxPackage`_ and `Remove-AppxPackage`_ PowerShell cmdlets are recommended: + +.. code-block:: powershell + + $> Add-AppxPackage C:\Downloads\python-manager-25.0.msix + ... + $> Get-AppxPackage PythonSoftwareFoundation.PythonManager | Remove-AppxPackage + +The latest release can be downloaded and installed by Windows by passing the +AppInstaller file to the Add-AppxPackage command. This installs using the MSIX +on python.org, and is only recommended for cases where installing via the Store +(interactively or using WinGet) is not possible. + +.. code-block:: powershell + + $> Add-AppxPackage -AppInstallerFile https://www.python.org/ftp/python/pymanager/pymanager.appinstaller + +Other tools and APIs may also be used to provision an MSIX package for all users +on a machine, but Python does not consider this a supported scenario. We suggest +looking into the PowerShell `Add-AppxProvisionedPackage`_ cmdlet, the native +Windows `PackageManager`_ class, or the documentation and support for your +deployment tool. + +Regardless of the install method, users will still need to install their own +copies of Python itself, as there is no way to trigger those installs without +being a logged in user. When using the MSIX, the latest version of Python will +be available for all users to install without network access. + +Note that the MSIX downloadable from the Store and from the Python website are +subtly different and cannot be installed at the same time. Wherever possible, +we suggest using the above WinGet commands to download the package from the +Store to reduce the risk of setting up conflicting installs. There are no +licensing restrictions on the Python install manager that would prevent using +the Store package in this way. + + +.. _pymanager-admin-config: + +Administrative Configuration +---------------------------- + +There are a number of options that may be useful for administrators to override +configuration of the Python install manager. These can be used to provide local +caching, disable certain shortcut types, override bundled content. All of the +above configuration options may be set, as well as those below. + +Configuration options may be overridden in the registry by setting values under +:file:`HKEY_LOCAL_MACHINE\\Software\\Policies\\Python\\PyManager`, where the +value name matches the configuration key and the value type is ``REG_SZ``. Note +that this key can itself be customized, but only by modifying the core config +file distributed with the Python install manager. We recommend, however, that +registry values are used only to set ``base_config`` to a JSON file containing +the full set of overrides. Registry key overrides will replace any other +configured setting, while ``base_config`` allows users to further modify +settings they may need. + +Note that most settings with environment variables support those variables +because their default setting specifies the variable. If you override them, the +environment variable will no longer work, unless you override it with another +one. For example, the default value of ``confirm`` is literally +``%PYTHON_MANAGER_CONFIRM%``, which will resolve the variable at load time. If +you override the value to ``yes``, then the environment variable will no longer +be used. If you override the value to ``%CONFIRM%``, then that environment +variable will be used instead. + +Configuration settings that are paths are interpreted as relative to the +directory containing the configuration file that specified them. + +.. csv-table:: Administrative configuration options + :header: "Config Key", "Description" + :widths: 1, 4 + + ``base_config``,"The highest priority configuration file to read. Note that + only the built-in configuration file and the registry can modify this + setting. + " + ``user_config``,"The second configuration file to read. + " + ``additional_config``,"The third configuration file to read. + " + ``registry_override_key``,"Registry location to check for overrides. Note + that only the built-in configuration file can modify this setting. + " + ``bundled_dir``,"Read-only directory containing locally cached files. + " + ``install.fallback_source``,"Path or URL to an index to consult when the + main index cannot be accessed. + " + ``install.enable_shortcut_kinds``,"Comma-separated list of shortcut kinds + to allow (e.g. ``""pep514,start""``). Enabled shortcuts may still be disabled + by ``disable_shortcut_kinds``. + " + ``install.disable_shortcut_kinds``,"Comma-separated list of shortcut kinds + to exclude (e.g. ``""pep514,start""``). Disabled shortcuts are not + reactivated by ``enable_shortcut_kinds``. + " + ``pep514_root``,"Registry location to read and write PEP 514 entries into. + By default, :file:`HKEY_CURRENT_USER\\Software\\Python`. + " + ``start_folder``,"Start menu folder to write shortcuts into. By default, + ``Python``. This path is relative to the user's Programs folder. + " + ``virtual_env``,"Path to the active virtual environment. By default, this + is ``%VIRTUAL_ENV%``, but may be set empty to disable venv detection. + " + ``shebang_can_run_anything_silently``,"True to suppress visible warnings + when a shebang launches an application other than a Python runtime. + " + +.. _install-freethreaded-windows: + +Installing Free-threaded Binaries +--------------------------------- + +.. versionadded:: 3.13 (Experimental) + +.. note:: + + Everything described in this section is considered experimental, + and should be expected to change in future releases. + +Pre-built distributions of the experimental free-threaded build are available +by installing tags with the ``t`` suffix. + +.. code:: + + $> py install 3.14t + $> py install 3.14t-arm64 + $> py install 3.14t-32 + +This will install and register as normal. If you have no other runtimes +installed, then ``python`` will launch this one. Otherwise, you will need to use +``py -V:3.14t ...`` or, if you have added the global aliases directory to your +:envvar:`PATH` environment variable, the ``python3.14t.exe`` commands. + +.. _pymanager-troubleshoot: + +Troubleshooting +--------------- + +If your Python install manager does not seem to be working correctly, please +work through these tests and fixes to see if it helps. If not, please report an +issue at `our bug tracker `_, +including any relevant log files (written to your :file:`%TEMP%` directory by +default). + +.. csv-table:: Troubleshooting + :header: "Symptom", "Things to try" + :widths: 1, 1 + + "``python`` gives me a ""command not found"" error or opens the Store app + when I type it in my terminal.", "Did you :ref:`install the Python install + manager `? + " + "", "Click Start, open ""Manage app execution aliases"", and check that the + aliases for ""Python (default)"" are enabled. If they already are, try + disabling and re-enabling to refresh the command. The ""Python (default + windowed)"" and ""Python install manager"" commands may also need refreshing. + " + "", "Check that the ``py`` and ``pymanager`` commands work. + " + "``py`` gives me a ""command not found"" error when I type it in my + terminal.","Did you :ref:`install the Python install manager `? + " + "", "Click Start, open ""Manage app execution aliases"", and check that the + aliases for ""Python install manager"" are enabled. If they already are, try + disabling and re-enabling to refresh the command. The ""Python (default + windowed)"" and ""Python install manager"" commands may also need refreshing. + " + "``py`` gives me a ""can't open file"" error when I type commands in my + terminal.", "This usually means you have the legacy launcher installed and it + has priority over the Python install manager. To remove, click Start, open + ""Installed apps"", search for ""Python launcher"" and uninstall it. + " + "``python`` doesn't launch the same runtime as ``py``", "Click Start, open + ""Installed apps"", look for any existing Python runtimes, and either remove + them or Modify and disable the :envvar:`PATH` options. + " + "", "Click Start, open ""Manage app execution aliases"", and check that your + ``python.exe`` alias is set to ""Python (default)"" + " + "``python`` and ``py`` don't launch the runtime I expect", "Check your + ``PYTHON_MANAGER_DEFAULT`` environment variable or ``default_tag`` + configuration. The ``py list`` command will show your default based on these + settings. + " + "", "Installs that are managed by the Python install manager will be chosen + ahead of unmanaged installs. Use ``py install`` to install the runtime you + expect, or configure your default tag. + " + "", "Prerelease and experimental installs that are not managed by the Python + install manager may be chosen ahead of stable releases. Configure your + default tag or uninstall the prerelease runtime and reinstall using ``py + install``. + " + "``pythonw`` or ``pyw`` don't launch the same runtime as ``python`` or + ``py``","Click Start, open ""Manage app execution aliases"", and check that + your ``pythonw.exe`` and ``pyw.exe`` aliases are consistent with your + others. + " + "``pip`` gives me a ""command not found"" error when I type it in my + terminal.","Have you activated a virtual environment? Run the + ``.venv\Scripts\activate`` script in your terminal to activate. + " + "","The package may be available but missing the generated executable. + We recommend using the ``python -m pip`` command instead, or alternatively + the ``python -m pip install --force pip`` command will recreate the + executables and show you the path to add to :envvar:`PATH`. These scripts are + separated for each runtime, and so you may need to add multiple paths. + " + + +.. _windows-embeddable: + +The embeddable package +====================== + +.. versionadded:: 3.5 + +The embedded distribution is a ZIP file containing a minimal Python environment. +It is intended for acting as part of another application, rather than being +directly accessed by end-users. + +To install an embedded distribution, we recommend using ``py install`` with the +``--target`` option: + +.. code:: + + $> py install 3.14-embed --target=runtime + +When extracted, the embedded distribution is (almost) fully isolated from the +user's system, including environment variables, system registry settings, and +installed packages. The standard library is included as pre-compiled and +optimized ``.pyc`` files in a ZIP, and ``python3.dll``, ``python313.dll``, +``python.exe`` and ``pythonw.exe`` are all provided. Tcl/tk (including all +dependents, such as Idle), pip and the Python documentation are not included. + +A default ``._pth`` file is included, which further restricts the default search +paths (as described below in :ref:`windows_finding_modules`). This file is +intended for embedders to modify as necessary. + +Third-party packages should be installed by the application installer alongside +the embedded distribution. Using pip to manage dependencies as for a regular +Python installation is not supported with this distribution, though with some +care it may be possible to include and use pip for automatic updates. In +general, third-party packages should be treated as part of the application +("vendoring") so that the developer can ensure compatibility with newer +versions before providing updates to users. + +The two recommended use cases for this distribution are described below. + +Python Application +------------------ + +An application written in Python does not necessarily require users to be aware +of that fact. The embedded distribution may be used in this case to include a +private version of Python in an install package. Depending on how transparent it +should be (or conversely, how professional it should appear), there are two +options. + +Using a specialized executable as a launcher requires some coding, but provides +the most transparent experience for users. With a customized launcher, there are +no obvious indications that the program is running on Python: icons can be +customized, company and version information can be specified, and file +associations behave properly. In most cases, a custom launcher should simply be +able to call ``Py_Main`` with a hard-coded command line. + +The simpler approach is to provide a batch file or generated shortcut that +directly calls the ``python.exe`` or ``pythonw.exe`` with the required +command-line arguments. In this case, the application will appear to be Python +and not its actual name, and users may have trouble distinguishing it from other +running Python processes or file associations. + +With the latter approach, packages should be installed as directories alongside +the Python executable to ensure they are available on the path. With the +specialized launcher, packages can be located in other locations as there is an +opportunity to specify the search path before launching the application. + +Embedding Python +---------------- + +Applications written in native code often require some form of scripting +language, and the embedded Python distribution can be used for this purpose. In +general, the majority of the application is in native code, and some part will +either invoke ``python.exe`` or directly use ``python3.dll``. For either case, +extracting the embedded distribution to a subdirectory of the application +installation is sufficient to provide a loadable Python interpreter. + +As with the application use, packages can be installed to any location as there +is an opportunity to specify search paths before initializing the interpreter. +Otherwise, there is no fundamental differences between using the embedded +distribution and a regular installation. + + +.. _windows-nuget: + +The nuget.org packages +====================== + +.. versionadded:: 3.5.2 + +The nuget.org package is a reduced size Python environment intended for use on +continuous integration and build systems that do not have a system-wide +install of Python. While nuget is "the package manager for .NET", it also works +perfectly fine for packages containing build-time tools. + +Visit `nuget.org `_ for the most up-to-date information +on using nuget. What follows is a summary that is sufficient for Python +developers. + +The ``nuget.exe`` command line tool may be downloaded directly from +``https://aka.ms/nugetclidl``, for example, using curl or PowerShell. With the +tool, the latest version of Python for 64-bit or 32-bit machines is installed +using:: + + nuget.exe install python -ExcludeVersion -OutputDirectory . + nuget.exe install pythonx86 -ExcludeVersion -OutputDirectory . + +To select a particular version, add a ``-Version 3.x.y``. The output directory +may be changed from ``.``, and the package will be installed into a +subdirectory. By default, the subdirectory is named the same as the package, +and without the ``-ExcludeVersion`` option this name will include the specific +version installed. Inside the subdirectory is a ``tools`` directory that +contains the Python installation: + +.. code-block:: doscon + + # Without -ExcludeVersion + > .\python.3.5.2\tools\python.exe -V + Python 3.5.2 + + # With -ExcludeVersion + > .\python\tools\python.exe -V + Python 3.5.2 + +In general, nuget packages are not upgradeable, and newer versions should be +installed side-by-side and referenced using the full path. Alternatively, +delete the package directory manually and install it again. Many CI systems +will do this automatically if they do not preserve files between builds. + +Alongside the ``tools`` directory is a ``build\native`` directory. This +contains a MSBuild properties file ``python.props`` that can be used in a +C++ project to reference the Python install. Including the settings will +automatically use the headers and import libraries in your build. + +The package information pages on nuget.org are +`www.nuget.org/packages/python `_ +for the 64-bit version, `www.nuget.org/packages/pythonx86 +`_ for the 32-bit version, and +`www.nuget.org/packages/pythonarm64 +`_ for the ARM64 version + +Free-threaded packages +---------------------- + +.. versionadded:: 3.13 (Experimental) + +.. note:: + + Everything described in this section is considered experimental, + and should be expected to change in future releases. + +Packages containing free-threaded binaries are named +`python-freethreaded `_ +for the 64-bit version, `pythonx86-freethreaded +`_ for the 32-bit +version, and `pythonarm64-freethreaded +`_ for the ARM64 +version. These packages contain both the ``python3.13t.exe`` and +``python.exe`` entry points, both of which run free threaded. + + +Alternative bundles +=================== + +Besides the standard CPython distribution, there are modified packages including +additional functionality. The following is a list of popular versions and their +key features: + +`ActivePython `_ + Installer with multi-platform compatibility, documentation, PyWin32 + +`Anaconda `_ + Popular scientific modules (such as numpy, scipy and pandas) and the + ``conda`` package manager. + +`Enthought Deployment Manager `_ + "The Next Generation Python Environment and Package Manager". + + Previously Enthought provided Canopy, but it `reached end of life in 2016 + `_. + +`WinPython `_ + Windows-specific distribution with prebuilt scientific packages and + tools for building packages. + +Note that these packages may not include the latest versions of Python or +other libraries, and are not maintained or supported by the core Python team. + + +Supported Windows versions +========================== As specified in :pep:`11`, a Python release only supports a Windows platform while Microsoft considers the platform under extended support. This means that @@ -27,30 +979,246 @@ Python |version| supports Windows 10 and newer. If you require Windows 7 support, please install Python 3.8. If you require Windows 8.1 support, please install Python 3.12. -There are a number of different installers available for Windows, each with -certain benefits and downsides. -:ref:`windows-full` contains all components and is the best option for -developers using Python for any kind of project. +.. _max-path: -:ref:`windows-store` is a simple installation of Python that is suitable for -running scripts and packages, and using IDLE or other development environments. -It requires Windows 10 and above, but can be safely installed without corrupting other -programs. It also provides many convenient commands for launching Python and -its tools. +Removing the MAX_PATH Limitation +================================ -:ref:`windows-nuget` are lightweight installations intended for continuous -integration systems. It can be used to build Python packages or run scripts, -but is not updateable and has no user interface tools. +Windows historically has limited path lengths to 260 characters. This meant that +paths longer than this would not resolve and errors would result. + +In the latest versions of Windows, this limitation can be expanded to over +32,000 characters. Your administrator will need to activate the "Enable Win32 +long paths" group policy, or set ``LongPathsEnabled`` to ``1`` in the registry +key ``HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem``. + +This allows the :func:`open` function, the :mod:`os` module and most other +path functionality to accept and return paths longer than 260 characters. + +After changing the above option and rebooting, no further configuration is +required. + + +.. _win-utf8-mode: + +UTF-8 mode +========== + +.. versionadded:: 3.7 + +Windows still uses legacy encodings for the system encoding (the ANSI Code +Page). Python uses it for the default encoding of text files (e.g. +:func:`locale.getencoding`). + +This may cause issues because UTF-8 is widely used on the internet +and most Unix systems, including WSL (Windows Subsystem for Linux). + +You can use the :ref:`Python UTF-8 Mode ` to change the default text +encoding to UTF-8. You can enable the :ref:`Python UTF-8 Mode ` via +the ``-X utf8`` command line option, or the ``PYTHONUTF8=1`` environment +variable. See :envvar:`PYTHONUTF8` for enabling UTF-8 mode, and +:ref:`setting-envvars` for how to modify environment variables. + +When the :ref:`Python UTF-8 Mode ` is enabled, you can still use the +system encoding (the ANSI Code Page) via the "mbcs" codec. + +Note that adding ``PYTHONUTF8=1`` to the default environment variables +will affect all Python 3.7+ applications on your system. +If you have any Python 3.7+ applications which rely on the legacy +system encoding, it is recommended to set the environment variable +temporarily or use the ``-X utf8`` command line option. + +.. note:: + Even when UTF-8 mode is disabled, Python uses UTF-8 by default + on Windows for: + + * Console I/O including standard I/O (see :pep:`528` for details). + * The :term:`filesystem encoding ` + (see :pep:`529` for details). + + +.. _windows_finding_modules: + +Finding modules +=============== + +These notes supplement the description at :ref:`sys-path-init` with +detailed Windows notes. + +When no ``._pth`` file is found, this is how :data:`sys.path` is populated on +Windows: + +* An empty entry is added at the start, which corresponds to the current + directory. + +* If the environment variable :envvar:`PYTHONPATH` exists, as described in + :ref:`using-on-envvars`, its entries are added next. Note that on Windows, + paths in this variable must be separated by semicolons, to distinguish them + from the colon used in drive identifiers (``C:\`` etc.). + +* Additional "application paths" can be added in the registry as subkeys of + :samp:`\\SOFTWARE\\Python\\PythonCore\\{version}\\PythonPath` under both the + ``HKEY_CURRENT_USER`` and ``HKEY_LOCAL_MACHINE`` hives. Subkeys which have + semicolon-delimited path strings as their default value will cause each path + to be added to :data:`sys.path`. (Note that all known installers only use + HKLM, so HKCU is typically empty.) + +* If the environment variable :envvar:`PYTHONHOME` is set, it is assumed as + "Python Home". Otherwise, the path of the main Python executable is used to + locate a "landmark file" (either ``Lib\os.py`` or ``pythonXY.zip``) to deduce + the "Python Home". If a Python home is found, the relevant sub-directories + added to :data:`sys.path` (``Lib``, ``plat-win``, etc) are based on that + folder. Otherwise, the core Python path is constructed from the PythonPath + stored in the registry. + +* If the Python Home cannot be located, no :envvar:`PYTHONPATH` is specified in + the environment, and no registry entries can be found, a default path with + relative entries is used (e.g. ``.\Lib;.\plat-win``, etc). + +If a ``pyvenv.cfg`` file is found alongside the main executable or in the +directory one level above the executable, the following variations apply: + +* If ``home`` is an absolute path and :envvar:`PYTHONHOME` is not set, this + path is used instead of the path to the main executable when deducing the + home location. + +The end result of all this is: + +* When running :file:`python.exe`, or any other .exe in the main Python + directory (either an installed version, or directly from the PCbuild + directory), the core path is deduced, and the core paths in the registry are + ignored. Other "application paths" in the registry are always read. + +* When Python is hosted in another .exe (different directory, embedded via COM, + etc), the "Python Home" will not be deduced, so the core path from the + registry is used. Other "application paths" in the registry are always read. + +* If Python can't find its home and there are no registry value (frozen .exe, + some very strange installation setup) you get a path with some default, but + relative, paths. + +For those who want to bundle Python into their application or distribution, the +following advice will prevent conflicts with other installations: + +* Include a ``._pth`` file alongside your executable containing the + directories to include. This will ignore paths listed in the registry and + environment variables, and also ignore :mod:`site` unless ``import site`` is + listed. + +* If you are loading :file:`python3.dll` or :file:`python37.dll` in your own + executable, explicitly set :c:member:`PyConfig.module_search_paths` before + :c:func:`Py_InitializeFromConfig`. + +* Clear and/or overwrite :envvar:`PYTHONPATH` and set :envvar:`PYTHONHOME` + before launching :file:`python.exe` from your application. + +* If you cannot use the previous suggestions (for example, you are a + distribution that allows people to run :file:`python.exe` directly), ensure + that the landmark file (:file:`Lib\\os.py`) exists in your install directory. + (Note that it will not be detected inside a ZIP file, but a correctly named + ZIP file will be detected instead.) + +These will ensure that the files in a system-wide installation will not take +precedence over the copy of the standard library bundled with your application. +Otherwise, your users may experience problems using your application. Note that +the first suggestion is the best, as the others may still be susceptible to +non-standard paths in the registry and user site-packages. + +.. versionchanged:: 3.6 + + Add ``._pth`` file support and removes ``applocal`` option from + ``pyvenv.cfg``. + +.. versionchanged:: 3.6 + + Add :file:`python{XX}.zip` as a potential landmark when directly adjacent + to the executable. + +.. deprecated:: 3.6 + + Modules specified in the registry under ``Modules`` (not ``PythonPath``) + may be imported by :class:`importlib.machinery.WindowsRegistryFinder`. + This finder is enabled on Windows in 3.6.0 and earlier, but may need to + be explicitly added to :data:`sys.meta_path` in the future. + +Additional modules +================== + +Even though Python aims to be portable among all platforms, there are features +that are unique to Windows. A couple of modules, both in the standard library +and external, and snippets exist to use these features. + +The Windows-specific standard modules are documented in +:ref:`mswin-specific-services`. + +PyWin32 +------- + +The :pypi:`PyWin32` module by Mark Hammond +is a collection of modules for advanced Windows-specific support. This includes +utilities for: + +* `Component Object Model + `_ + (COM) +* Win32 API calls +* Registry +* Event log +* `Microsoft Foundation Classes + `_ + (MFC) user interfaces + +`PythonWin `_ is a sample MFC application +shipped with PyWin32. It is an embeddable IDE with a built-in debugger. + +.. seealso:: + + `Win32 How Do I...? `_ + by Tim Golden + + `Python and COM `_ + by David and Paul Boddie + + +cx_Freeze +--------- + +`cx_Freeze `_ +wraps Python scripts into executable Windows programs +(:file:`{*}.exe` files). When you have done this, you can distribute your +application without requiring your users to install Python. + + +Compiling Python on Windows +=========================== + +If you want to compile CPython yourself, first thing you should do is get the +`source `_. You can download either the +latest release's source or just grab a fresh `checkout +`_. + +The source tree contains a build solution and project files for Microsoft +Visual Studio, which is the compiler used to build the official Python +releases. These files are in the :file:`PCbuild` directory. + +Check :file:`PCbuild/readme.txt` for general information on the build process. + +For extension modules, consult :ref:`building-on-windows`. -:ref:`windows-embeddable` is a minimal package of Python suitable for -embedding into a larger application. .. _windows-full: -The full installer -================== +The full installer (deprecated) +=============================== + +.. deprecated:: 3.14 + + This installer is deprecated since 3.14 and will not be produced for Python + 3.16 or later. See :ref:`pymanager` for the modern installer. + Installation steps ------------------ @@ -94,7 +1262,6 @@ installation". In this case: * If selected, the install directory will be added to the system :envvar:`PATH` * Shortcuts are available for all users -.. _max-path: Removing the MAX_PATH Limitation -------------------------------- @@ -308,7 +1475,6 @@ settings and replace any that have been removed or modified. "Uninstall" will remove Python entirely, with the exception of the :ref:`launcher`, which has its own entry in Programs and Features. -.. _install-freethreaded-windows: Installing Free-threaded Binaries --------------------------------- @@ -349,416 +1515,15 @@ builds. Free-threaded binaries are also available :ref:`on nuget.org `. -.. _windows-store: -The Microsoft Store package -=========================== +Python Launcher for Windows (Deprecated) +======================================== -.. versionadded:: 3.7.2 +.. deprecated:: 3.14 -The Microsoft Store package is an easily installable Python interpreter that -is intended mainly for interactive use, for example, by students. - -To install the package, ensure you have the latest Windows 10 updates and -search the Microsoft Store app for "Python |version|". Ensure that the app -you select is published by the Python Software Foundation, and install it. - -.. warning:: - Python will always be available for free on the Microsoft Store. If you - are asked to pay for it, you have not selected the correct package. - -After installation, Python may be launched by finding it in Start. -Alternatively, it will be available from any Command Prompt or PowerShell -session by typing ``python``. Further, pip and IDLE may be used by typing -``pip`` or ``idle``. IDLE can also be found in Start. - -All three commands are also available with version number suffixes, for -example, as ``python3.exe`` and ``python3.x.exe`` as well as -``python.exe`` (where ``3.x`` is the specific version you want to launch, -such as |version|). Open "Manage App Execution Aliases" through Start to -select which version of Python is associated with each command. It is -recommended to make sure that ``pip`` and ``idle`` are consistent with -whichever version of ``python`` is selected. - -Virtual environments can be created with ``python -m venv`` and activated -and used as normal. - -If you have installed another version of Python and added it to your -``PATH`` variable, it will be available as ``python.exe`` rather than the -one from the Microsoft Store. To access the new installation, use -``python3.exe`` or ``python3.x.exe``. - -The ``py.exe`` launcher will detect this Python installation, but will prefer -installations from the traditional installer. - -To remove Python, open Settings and use Apps and Features, or else find -Python in Start and right-click to select Uninstall. Uninstalling will -remove all packages you installed directly into this Python installation, but -will not remove any virtual environments - -Known issues ------------- - -Redirection of local data, registry, and temporary paths -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Because of restrictions on Microsoft Store apps, Python scripts may not have -full write access to shared locations such as :envvar:`TEMP` and the registry. -Instead, it will write to a private copy. If your scripts must modify the -shared locations, you will need to install the full installer. - -At runtime, Python will use a private copy of well-known Windows folders and the registry. -For example, if the environment variable :envvar:`%APPDATA%` is :file:`c:\\Users\\\\AppData\\`, -then when writing to :file:`C:\\Users\\\\AppData\\Local` will write to -:file:`C:\\Users\\\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\\LocalCache\\Local\\`. - -When reading files, Windows will return the file from the private folder, or if that does not exist, the -real Windows directory. For example reading :file:`C:\\Windows\\System32` returns the contents of :file:`C:\\Windows\\System32` -plus the contents of :file:`C:\\Program Files\\WindowsApps\\package_name\\VFS\\SystemX86`. - -You can find the real path of any existing file using :func:`os.path.realpath`: - -.. code-block:: python - - >>> import os - >>> test_file = 'C:\\Users\\example\\AppData\\Local\\test.txt' - >>> os.path.realpath(test_file) - 'C:\\Users\\example\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\\LocalCache\\Local\\test.txt' - -When writing to the Windows Registry, the following behaviors exist: - -* Reading from ``HKLM\\Software`` is allowed and results are merged with the :file:`registry.dat` file in the package. -* Writing to ``HKLM\\Software`` is not allowed if the corresponding key/value exists, i.e. modifying existing keys. -* Writing to ``HKLM\\Software`` is allowed as long as a corresponding key/value does not exist in the package - and the user has the correct access permissions. - -For more detail on the technical basis for these limitations, please consult -Microsoft's documentation on packaged full-trust apps, currently available at -`docs.microsoft.com/en-us/windows/msix/desktop/desktop-to-uwp-behind-the-scenes -`_ - - -.. _windows-nuget: - -The nuget.org packages -====================== - -.. versionadded:: 3.5.2 - -The nuget.org package is a reduced size Python environment intended for use on -continuous integration and build systems that do not have a system-wide -install of Python. While nuget is "the package manager for .NET", it also works -perfectly fine for packages containing build-time tools. - -Visit `nuget.org `_ for the most up-to-date information -on using nuget. What follows is a summary that is sufficient for Python -developers. - -The ``nuget.exe`` command line tool may be downloaded directly from -``https://aka.ms/nugetclidl``, for example, using curl or PowerShell. With the -tool, the latest version of Python for 64-bit or 32-bit machines is installed -using:: - - nuget.exe install python -ExcludeVersion -OutputDirectory . - nuget.exe install pythonx86 -ExcludeVersion -OutputDirectory . - -To select a particular version, add a ``-Version 3.x.y``. The output directory -may be changed from ``.``, and the package will be installed into a -subdirectory. By default, the subdirectory is named the same as the package, -and without the ``-ExcludeVersion`` option this name will include the specific -version installed. Inside the subdirectory is a ``tools`` directory that -contains the Python installation: - -.. code-block:: doscon - - # Without -ExcludeVersion - > .\python.3.5.2\tools\python.exe -V - Python 3.5.2 - - # With -ExcludeVersion - > .\python\tools\python.exe -V - Python 3.5.2 - -In general, nuget packages are not upgradeable, and newer versions should be -installed side-by-side and referenced using the full path. Alternatively, -delete the package directory manually and install it again. Many CI systems -will do this automatically if they do not preserve files between builds. - -Alongside the ``tools`` directory is a ``build\native`` directory. This -contains a MSBuild properties file ``python.props`` that can be used in a -C++ project to reference the Python install. Including the settings will -automatically use the headers and import libraries in your build. - -The package information pages on nuget.org are -`www.nuget.org/packages/python `_ -for the 64-bit version, `www.nuget.org/packages/pythonx86 -`_ for the 32-bit version, and -`www.nuget.org/packages/pythonarm64 -`_ for the ARM64 version - -Free-threaded packages ----------------------- - -.. versionadded:: 3.13 (Experimental) - -.. note:: - - Everything described in this section is considered experimental, - and should be expected to change in future releases. - -Packages containing free-threaded binaries are named -`python-freethreaded `_ -for the 64-bit version, `pythonx86-freethreaded -`_ for the 32-bit -version, and `pythonarm64-freethreaded -`_ for the ARM64 -version. These packages contain both the ``python3.13t.exe`` and -``python.exe`` entry points, both of which run free threaded. - -.. _windows-embeddable: - -The embeddable package -====================== - -.. versionadded:: 3.5 - -The embedded distribution is a ZIP file containing a minimal Python environment. -It is intended for acting as part of another application, rather than being -directly accessed by end-users. - -When extracted, the embedded distribution is (almost) fully isolated from the -user's system, including environment variables, system registry settings, and -installed packages. The standard library is included as pre-compiled and -optimized ``.pyc`` files in a ZIP, and ``python3.dll``, ``python37.dll``, -``python.exe`` and ``pythonw.exe`` are all provided. Tcl/tk (including all -dependents, such as Idle), pip and the Python documentation are not included. - -.. note:: - - The embedded distribution does not include the `Microsoft C Runtime - `_ and it is - the responsibility of the application installer to provide this. The - runtime may have already been installed on a user's system previously or - automatically via Windows Update, and can be detected by finding - ``ucrtbase.dll`` in the system directory. - -Third-party packages should be installed by the application installer alongside -the embedded distribution. Using pip to manage dependencies as for a regular -Python installation is not supported with this distribution, though with some -care it may be possible to include and use pip for automatic updates. In -general, third-party packages should be treated as part of the application -("vendoring") so that the developer can ensure compatibility with newer -versions before providing updates to users. - -The two recommended use cases for this distribution are described below. - -Python Application ------------------- - -An application written in Python does not necessarily require users to be aware -of that fact. The embedded distribution may be used in this case to include a -private version of Python in an install package. Depending on how transparent it -should be (or conversely, how professional it should appear), there are two -options. - -Using a specialized executable as a launcher requires some coding, but provides -the most transparent experience for users. With a customized launcher, there are -no obvious indications that the program is running on Python: icons can be -customized, company and version information can be specified, and file -associations behave properly. In most cases, a custom launcher should simply be -able to call ``Py_Main`` with a hard-coded command line. - -The simpler approach is to provide a batch file or generated shortcut that -directly calls the ``python.exe`` or ``pythonw.exe`` with the required -command-line arguments. In this case, the application will appear to be Python -and not its actual name, and users may have trouble distinguishing it from other -running Python processes or file associations. - -With the latter approach, packages should be installed as directories alongside -the Python executable to ensure they are available on the path. With the -specialized launcher, packages can be located in other locations as there is an -opportunity to specify the search path before launching the application. - -Embedding Python ----------------- - -Applications written in native code often require some form of scripting -language, and the embedded Python distribution can be used for this purpose. In -general, the majority of the application is in native code, and some part will -either invoke ``python.exe`` or directly use ``python3.dll``. For either case, -extracting the embedded distribution to a subdirectory of the application -installation is sufficient to provide a loadable Python interpreter. - -As with the application use, packages can be installed to any location as there -is an opportunity to specify search paths before initializing the interpreter. -Otherwise, there is no fundamental differences between using the embedded -distribution and a regular installation. - - -Alternative bundles -=================== - -Besides the standard CPython distribution, there are modified packages including -additional functionality. The following is a list of popular versions and their -key features: - -`ActivePython `_ - Installer with multi-platform compatibility, documentation, PyWin32 - -`Anaconda `_ - Popular scientific modules (such as numpy, scipy and pandas) and the - ``conda`` package manager. - -`Enthought Deployment Manager `_ - "The Next Generation Python Environment and Package Manager". - - Previously Enthought provided Canopy, but it `reached end of life in 2016 - `_. - -`WinPython `_ - Windows-specific distribution with prebuilt scientific packages and - tools for building packages. - -Note that these packages may not include the latest versions of Python or -other libraries, and are not maintained or supported by the core Python team. - - - -Configuring Python -================== - -To run Python conveniently from a command prompt, you might consider changing -some default environment variables in Windows. While the installer provides an -option to configure the PATH and PATHEXT variables for you, this is only -reliable for a single, system-wide installation. If you regularly use multiple -versions of Python, consider using the :ref:`launcher`. - - -.. _setting-envvars: - -Excursus: Setting environment variables ---------------------------------------- - -Windows allows environment variables to be configured permanently at both the -User level and the System level, or temporarily in a command prompt. - -To temporarily set environment variables, open Command Prompt and use the -:command:`set` command: - -.. code-block:: doscon - - C:\>set PATH=C:\Program Files\Python 3.9;%PATH% - C:\>set PYTHONPATH=%PYTHONPATH%;C:\My_python_lib - C:\>python - -These changes will apply to any further commands executed in that console, and -will be inherited by any applications started from the console. - -Including the variable name within percent signs will expand to the existing -value, allowing you to add your new value at either the start or the end. -Modifying :envvar:`PATH` by adding the directory containing -:program:`python.exe` to the start is a common way to ensure the correct version -of Python is launched. - -To permanently modify the default environment variables, click Start and search -for 'edit environment variables', or open System properties, :guilabel:`Advanced -system settings` and click the :guilabel:`Environment Variables` button. -In this dialog, you can add or modify User and System variables. To change -System variables, you need non-restricted access to your machine -(i.e. Administrator rights). - -.. note:: - - Windows will concatenate User variables *after* System variables, which may - cause unexpected results when modifying :envvar:`PATH`. - - The :envvar:`PYTHONPATH` variable is used by all versions of Python, - so you should not permanently configure it unless the listed paths - only include code that is compatible with all of your installed Python - versions. - -.. seealso:: - - https://learn.microsoft.com/windows/win32/procthread/environment-variables - Overview of environment variables on Windows - - https://learn.microsoft.com/windows-server/administration/windows-commands/set_1 - The ``set`` command, for temporarily modifying environment variables - - https://learn.microsoft.com/windows-server/administration/windows-commands/setx - The ``setx`` command, for permanently modifying environment variables - - -.. _windows-path-mod: - -Finding the Python executable ------------------------------ - -.. versionchanged:: 3.5 - -Besides using the automatically created start menu entry for the Python -interpreter, you might want to start Python in the command prompt. The -installer has an option to set that up for you. - -On the first page of the installer, an option labelled "Add Python to PATH" -may be selected to have the installer add the install location into the -:envvar:`PATH`. The location of the :file:`Scripts\\` folder is also added. -This allows you to type :command:`python` to run the interpreter, and -:command:`pip` for the package installer. Thus, you can also execute your -scripts with command line options, see :ref:`using-on-cmdline` documentation. - -If you don't enable this option at install time, you can always re-run the -installer, select Modify, and enable it. Alternatively, you can manually -modify the :envvar:`PATH` using the directions in :ref:`setting-envvars`. You -need to set your :envvar:`PATH` environment variable to include the directory -of your Python installation, delimited by a semicolon from other entries. An -example variable could look like this (assuming the first two entries already -existed):: - - C:\WINDOWS\system32;C:\WINDOWS;C:\Program Files\Python 3.9 - -.. _win-utf8-mode: - -UTF-8 mode -========== - -.. versionadded:: 3.7 - -Windows still uses legacy encodings for the system encoding (the ANSI Code -Page). Python uses it for the default encoding of text files (e.g. -:func:`locale.getencoding`). - -This may cause issues because UTF-8 is widely used on the internet -and most Unix systems, including WSL (Windows Subsystem for Linux). - -You can use the :ref:`Python UTF-8 Mode ` to change the default text -encoding to UTF-8. You can enable the :ref:`Python UTF-8 Mode ` via -the ``-X utf8`` command line option, or the ``PYTHONUTF8=1`` environment -variable. See :envvar:`PYTHONUTF8` for enabling UTF-8 mode, and -:ref:`setting-envvars` for how to modify environment variables. - -When the :ref:`Python UTF-8 Mode ` is enabled, you can still use the -system encoding (the ANSI Code Page) via the "mbcs" codec. - -Note that adding ``PYTHONUTF8=1`` to the default environment variables -will affect all Python 3.7+ applications on your system. -If you have any Python 3.7+ applications which rely on the legacy -system encoding, it is recommended to set the environment variable -temporarily or use the ``-X utf8`` command line option. - -.. note:: - Even when UTF-8 mode is disabled, Python uses UTF-8 by default - on Windows for: - - * Console I/O including standard I/O (see :pep:`528` for details). - * The :term:`filesystem encoding ` - (see :pep:`529` for details). - - -.. _launcher: - -Python Launcher for Windows -=========================== + The launcher and this documentation have been superseded by the Python + Install Manager described above. This is preserved temporarily for historical + interest. .. versionadded:: 3.3 @@ -1168,190 +1933,3 @@ listed in alphabetical order of names. | | | found. | +-------------------+-------+-----------------------------------------------+ - -.. _windows_finding_modules: - -Finding modules -=============== - -These notes supplement the description at :ref:`sys-path-init` with -detailed Windows notes. - -When no ``._pth`` file is found, this is how :data:`sys.path` is populated on -Windows: - -* An empty entry is added at the start, which corresponds to the current - directory. - -* If the environment variable :envvar:`PYTHONPATH` exists, as described in - :ref:`using-on-envvars`, its entries are added next. Note that on Windows, - paths in this variable must be separated by semicolons, to distinguish them - from the colon used in drive identifiers (``C:\`` etc.). - -* Additional "application paths" can be added in the registry as subkeys of - :samp:`\\SOFTWARE\\Python\\PythonCore\\{version}\\PythonPath` under both the - ``HKEY_CURRENT_USER`` and ``HKEY_LOCAL_MACHINE`` hives. Subkeys which have - semicolon-delimited path strings as their default value will cause each path - to be added to :data:`sys.path`. (Note that all known installers only use - HKLM, so HKCU is typically empty.) - -* If the environment variable :envvar:`PYTHONHOME` is set, it is assumed as - "Python Home". Otherwise, the path of the main Python executable is used to - locate a "landmark file" (either ``Lib\os.py`` or ``pythonXY.zip``) to deduce - the "Python Home". If a Python home is found, the relevant sub-directories - added to :data:`sys.path` (``Lib``, ``plat-win``, etc) are based on that - folder. Otherwise, the core Python path is constructed from the PythonPath - stored in the registry. - -* If the Python Home cannot be located, no :envvar:`PYTHONPATH` is specified in - the environment, and no registry entries can be found, a default path with - relative entries is used (e.g. ``.\Lib;.\plat-win``, etc). - -If a ``pyvenv.cfg`` file is found alongside the main executable or in the -directory one level above the executable, the following variations apply: - -* If ``home`` is an absolute path and :envvar:`PYTHONHOME` is not set, this - path is used instead of the path to the main executable when deducing the - home location. - -The end result of all this is: - -* When running :file:`python.exe`, or any other .exe in the main Python - directory (either an installed version, or directly from the PCbuild - directory), the core path is deduced, and the core paths in the registry are - ignored. Other "application paths" in the registry are always read. - -* When Python is hosted in another .exe (different directory, embedded via COM, - etc), the "Python Home" will not be deduced, so the core path from the - registry is used. Other "application paths" in the registry are always read. - -* If Python can't find its home and there are no registry value (frozen .exe, - some very strange installation setup) you get a path with some default, but - relative, paths. - -For those who want to bundle Python into their application or distribution, the -following advice will prevent conflicts with other installations: - -* Include a ``._pth`` file alongside your executable containing the - directories to include. This will ignore paths listed in the registry and - environment variables, and also ignore :mod:`site` unless ``import site`` is - listed. - -* If you are loading :file:`python3.dll` or :file:`python37.dll` in your own - executable, explicitly set :c:member:`PyConfig.module_search_paths` before - :c:func:`Py_InitializeFromConfig`. - -* Clear and/or overwrite :envvar:`PYTHONPATH` and set :envvar:`PYTHONHOME` - before launching :file:`python.exe` from your application. - -* If you cannot use the previous suggestions (for example, you are a - distribution that allows people to run :file:`python.exe` directly), ensure - that the landmark file (:file:`Lib\\os.py`) exists in your install directory. - (Note that it will not be detected inside a ZIP file, but a correctly named - ZIP file will be detected instead.) - -These will ensure that the files in a system-wide installation will not take -precedence over the copy of the standard library bundled with your application. -Otherwise, your users may experience problems using your application. Note that -the first suggestion is the best, as the others may still be susceptible to -non-standard paths in the registry and user site-packages. - -.. versionchanged:: 3.6 - - Add ``._pth`` file support and removes ``applocal`` option from - ``pyvenv.cfg``. - -.. versionchanged:: 3.6 - - Add :file:`python{XX}.zip` as a potential landmark when directly adjacent - to the executable. - -.. deprecated:: 3.6 - - Modules specified in the registry under ``Modules`` (not ``PythonPath``) - may be imported by :class:`importlib.machinery.WindowsRegistryFinder`. - This finder is enabled on Windows in 3.6.0 and earlier, but may need to - be explicitly added to :data:`sys.meta_path` in the future. - -Additional modules -================== - -Even though Python aims to be portable among all platforms, there are features -that are unique to Windows. A couple of modules, both in the standard library -and external, and snippets exist to use these features. - -The Windows-specific standard modules are documented in -:ref:`mswin-specific-services`. - -PyWin32 -------- - -The :pypi:`PyWin32` module by Mark Hammond -is a collection of modules for advanced Windows-specific support. This includes -utilities for: - -* `Component Object Model - `_ - (COM) -* Win32 API calls -* Registry -* Event log -* `Microsoft Foundation Classes - `_ - (MFC) user interfaces - -`PythonWin `_ is a sample MFC application -shipped with PyWin32. It is an embeddable IDE with a built-in debugger. - -.. seealso:: - - `Win32 How Do I...? `_ - by Tim Golden - - `Python and COM `_ - by David and Paul Boddie - - -cx_Freeze ---------- - -`cx_Freeze `_ -wraps Python scripts into executable Windows programs -(:file:`{*}.exe` files). When you have done this, you can distribute your -application without requiring your users to install Python. - - -Compiling Python on Windows -=========================== - -If you want to compile CPython yourself, first thing you should do is get the -`source `_. You can download either the -latest release's source or just grab a fresh `checkout -`_. - -The source tree contains a build solution and project files for Microsoft -Visual Studio, which is the compiler used to build the official Python -releases. These files are in the :file:`PCbuild` directory. - -Check :file:`PCbuild/readme.txt` for general information on the build process. - -For extension modules, consult :ref:`building-on-windows`. - - -Other Platforms -=============== - -With ongoing development of Python, some platforms that used to be supported -earlier are no longer supported (due to the lack of users or developers). -Check :pep:`11` for details on all unsupported platforms. - -* `Windows CE `_ is - `no longer supported `__ - since Python 3 (if it ever was). -* The `Cygwin `_ installer offers to install the - `Python interpreter `__ - as well - -See `Python for Windows `_ -for detailed information about platforms with pre-compiled installers. diff --git a/Doc/whatsnew/2.6.rst b/Doc/whatsnew/2.6.rst index fdccfb7deb1..0803eba99e6 100644 --- a/Doc/whatsnew/2.6.rst +++ b/Doc/whatsnew/2.6.rst @@ -1747,7 +1747,7 @@ Interpreter Changes ------------------------------- Two command-line options have been reserved for use by other Python -implementations. The :option:`-J` switch has been reserved for use by +implementations. The :option:`!-J` switch has been reserved for use by Jython for Jython-specific options, such as switches that are passed to the underlying JVM. :option:`-X` has been reserved for options specific to a particular implementation of Python such as CPython, @@ -3043,7 +3043,7 @@ Changes to Python's build process and to the C API include: * Importing modules simultaneously in two different threads no longer deadlocks; it will now raise an :exc:`ImportError`. A new API - function, :c:func:`PyImport_ImportModuleNoBlock`, will look for a + function, :c:func:`!PyImport_ImportModuleNoBlock`, will look for a module in ``sys.modules`` first, then try to import it after acquiring an import lock. If the import lock is held by another thread, an :exc:`ImportError` is raised. diff --git a/Doc/whatsnew/3.0.rst b/Doc/whatsnew/3.0.rst index 6e1fda22ed2..d858586138e 100644 --- a/Doc/whatsnew/3.0.rst +++ b/Doc/whatsnew/3.0.rst @@ -870,7 +870,7 @@ to the C API. * :c:func:`!PyNumber_Coerce`, :c:func:`!PyNumber_CoerceEx`, :c:func:`!PyMember_Get`, and :c:func:`!PyMember_Set` C APIs are removed. -* New C API :c:func:`PyImport_ImportModuleNoBlock`, works like +* New C API :c:func:`!PyImport_ImportModuleNoBlock`, works like :c:func:`PyImport_ImportModule` but won't block on the import lock (returning an error instead). diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 3c815721a92..1067601c652 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -551,11 +551,12 @@ Patterns and classes If you are using classes to structure your data, you can use as a pattern the class name followed by an argument list resembling a constructor. This -pattern has the ability to capture class attributes into variables:: +pattern has the ability to capture instance attributes into variables:: class Point: - x: int - y: int + def __init__(self, x, y): + self.x = x + self.y = y def location(point): match point: @@ -2176,9 +2177,9 @@ Porting to Python 3.10 ``unicodedata.ucnhash_CAPI`` has been moved to the internal C API. (Contributed by Victor Stinner in :issue:`42157`.) -* :c:func:`Py_GetPath`, :c:func:`Py_GetPrefix`, :c:func:`Py_GetExecPrefix`, - :c:func:`Py_GetProgramFullPath`, :c:func:`Py_GetPythonHome` and - :c:func:`Py_GetProgramName` functions now return ``NULL`` if called before +* :c:func:`!Py_GetPath`, :c:func:`!Py_GetPrefix`, :c:func:`!Py_GetExecPrefix`, + :c:func:`!Py_GetProgramFullPath`, :c:func:`!Py_GetPythonHome` and + :c:func:`!Py_GetProgramName` functions now return ``NULL`` if called before :c:func:`Py_Initialize` (before Python is initialized). Use the new :ref:`init-config` API to get the :ref:`init-path-config`. (Contributed by Victor Stinner in :issue:`42260`.) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index afbc7a1c16f..a65f59c0a72 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -1347,6 +1347,8 @@ Deprecated .. include:: ../deprecations/pending-removal-in-3.16.rst +.. include:: ../deprecations/pending-removal-in-3.17.rst + .. include:: ../deprecations/pending-removal-in-future.rst .. _whatsnew312-removed: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 2090759d3c4..580a3d8154d 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -730,6 +730,22 @@ asyncio never awaited). (Contributed by Arthur Tacca and Jason Zhang in :gh:`115957`.) +* The function and methods named ``create_task`` have received a new + ``**kwargs`` argument that is passed through to the task constructor. + This change was accidentally added in 3.13.3, + and broke the API contract for custom task factories. + Several third-party task factories implemented workarounds for this. + In 3.13.4 and later releases the old factory contract is honored + once again (until 3.14). + To keep the workarounds working, the extra ``**kwargs`` argument still + allows passing additional keyword arguments to :class:`~asyncio.Task` + and to custom task factories. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) base64 ------ @@ -1871,7 +1887,7 @@ New Deprecations * :mod:`http.server`: - * Deprecate :class:`~http.server.CGIHTTPRequestHandler`, + * Deprecate :class:`!CGIHTTPRequestHandler`, to be removed in Python 3.15. Process-based CGI HTTP servers have been out of favor for a very long time. This code was outdated, unmaintained, and rarely used. @@ -1908,7 +1924,7 @@ New Deprecations * :mod:`platform`: - * Deprecate :func:`~platform.java_ver`, + * Deprecate :func:`!platform.java_ver`, to be removed in Python 3.15. This function is only useful for Jython support, has a confusing API, and is largely untested. @@ -1995,8 +2011,7 @@ New Deprecations * :mod:`wave`: - * Deprecate the :meth:`~wave.Wave_read.getmark`, :meth:`!setmark`, - and :meth:`~wave.Wave_read.getmarkers` methods of + * Deprecate the ``getmark()``, ``setmark()`` and ``getmarkers()`` methods of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` classes, to be removed in Python 3.15. (Contributed by Victor Stinner in :gh:`105096`.) @@ -2009,6 +2024,8 @@ New Deprecations .. include:: ../deprecations/pending-removal-in-3.16.rst +.. include:: ../deprecations/pending-removal-in-3.17.rst + .. include:: ../deprecations/pending-removal-in-future.rst CPython Bytecode Changes @@ -2475,17 +2492,17 @@ Deprecated C APIs * :c:func:`PySys_ResetWarnOptions`: Clear :data:`sys.warnoptions` and :data:`!warnings.filters` instead. - * :c:func:`Py_GetExecPrefix`: + * :c:func:`!Py_GetExecPrefix`: Get :data:`sys.exec_prefix` instead. - * :c:func:`Py_GetPath`: + * :c:func:`!Py_GetPath`: Get :data:`sys.path` instead. - * :c:func:`Py_GetPrefix`: + * :c:func:`!Py_GetPrefix`: Get :data:`sys.prefix` instead. - * :c:func:`Py_GetProgramFullPath`: + * :c:func:`!Py_GetProgramFullPath`: Get :data:`sys.executable` instead. - * :c:func:`Py_GetProgramName`: + * :c:func:`!Py_GetProgramName`: Get :data:`sys.executable` instead. - * :c:func:`Py_GetPythonHome`: + * :c:func:`!Py_GetPythonHome`: Get :c:member:`PyConfig.home` or the :envvar:`PYTHONHOME` environment variable instead. @@ -2497,7 +2514,7 @@ Deprecated C APIs which return a :term:`borrowed reference`. (Soft deprecated as part of :pep:`667`.) -* Deprecate the :c:func:`PyImport_ImportModuleNoBlock` function, +* Deprecate the :c:func:`!PyImport_ImportModuleNoBlock` function, which is just an alias to :c:func:`PyImport_ImportModule` since Python 3.3. (Contributed by Victor Stinner in :gh:`105396`.) @@ -2529,6 +2546,8 @@ Deprecated C APIs .. include:: ../deprecations/c-api-pending-removal-in-3.15.rst +.. include:: ../deprecations/c-api-pending-removal-in-3.18.rst + .. include:: ../deprecations/c-api-pending-removal-in-future.rst .. _pythoncapi-compat project: https://github.com/python/pythoncapi-compat/ diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 0f15a2a8a8f..88e52015bdc 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -2,7 +2,7 @@ What's new in Python 3.14 **************************** -:Editor: TBD +:Editor: Hugo van Kemenade .. Rules for maintenance: @@ -48,6 +48,10 @@ This article explains the new features in Python 3.14, compared to 3.13. For full details, see the :ref:`changelog `. +.. seealso:: + + :pep:`745` -- Python 3.14 release schedule + .. note:: Prerelease users should be aware that this document is currently in draft @@ -61,16 +65,39 @@ Summary -- release highlights .. This section singles out the most important changes in Python 3.14. Brevity is key. +Python 3.14 beta is the pre-release of the next version of the Python +programming language, with a mix of changes to the language, the +implementation and the standard library. + +The biggest changes to the implementation include template strings (:pep:`750`), +deferred evaluation of annotations (:pep:`649`), +and a new type of interpreter that uses tail calls. + +The library changes include the addition of a new :mod:`!annotationlib` module +for introspecting and wrapping annotations (:pep:`649`), +a new :mod:`!compression.zstd` module for Zstandard support (:pep:`784`), +plus syntax highlighting in the REPL, +as well as the usual deprecations and removals, +and improvements in user-friendliness and correctness. .. PEP-sized items next. -* :ref:`PEP 649: deferred evaluation of annotations ` +* :ref:`PEP 649 and 749: deferred evaluation of annotations ` * :ref:`PEP 741: Python Configuration C API ` +* :ref:`PEP 750: Template strings ` * :ref:`PEP 758: Allow except and except* expressions without parentheses ` * :ref:`PEP 761: Discontinuation of PGP signatures ` * :ref:`PEP 765: Disallow return/break/continue that exit a finally block ` +* :ref:`Free-threaded mode improvements ` * :ref:`PEP 768: Safe external debugger interface for CPython ` -* :ref:`A new type of interpreter ` +* :ref:`PEP 784: Adding Zstandard to the standard library ` +* :ref:`A new type of interpreter ` +* :ref:`Syntax highlighting in PyREPL `, + and color output in :ref:`unittest `, + :ref:`argparse `, + :ref:`json ` and + :ref:`calendar ` CLIs +* :ref:`Binary releases for the experimental just-in-time compiler ` Incompatible changes @@ -88,10 +115,86 @@ If you encounter :exc:`NameError`\s or pickling errors coming out of :mod:`multiprocessing` or :mod:`concurrent.futures`, see the :ref:`forkserver restrictions `. +The interpreter avoids some reference count modifications internally when +it's safe to do so. This can lead to different values returned from +:func:`sys.getrefcount` and :c:func:`Py_REFCNT` compared to previous versions +of Python. See :ref:`below ` for details. New features ============ +.. _whatsnew314-pep750: + +PEP 750: Template strings +------------------------- + +Template string literals (t-strings) are a generalization of f-strings, +using a ``t`` in place of the ``f`` prefix. Instead of evaluating +to :class:`str`, t-strings evaluate to a new :class:`!string.templatelib.Template` type: + +.. code-block:: python + + from string.templatelib import Template + + name = "World" + template: Template = t"Hello {name}" + +The template can then be combined with functions that operate on the template's +structure to produce a :class:`str` or a string-like result. +For example, sanitizing input: + +.. code-block:: python + + evil = "" + template = t"

    {evil}

    " + assert html(template) == "

    <script>alert('evil')</script>

    " + +As another example, generating HTML attributes from data: + +.. code-block:: python + + attributes = {"src": "shrubbery.jpg", "alt": "looks nice"} + template = t"" + assert html(template) == 'looks nice' + +Compared to using an f-string, the ``html`` function has access to template attributes +containing the original information: static strings, interpolations, and values +from the original scope. Unlike existing templating approaches, t-strings build +from the well-known f-string syntax and rules. Template systems thus benefit +from Python tooling as they are much closer to the Python language, syntax, +scoping, and more. + +Writing template handlers is straightforward: + +.. code-block:: python + + from string.templatelib import Template, Interpolation + + def lower_upper(template: Template) -> str: + """Render static parts lowercased and interpolations uppercased.""" + parts: list[str] = [] + for item in template: + if isinstance(item, Interpolation): + parts.append(str(item.value).upper()) + else: + parts.append(item.lower()) + return "".join(parts) + + name = "world" + assert lower_upper(t"HELLO {name}") == "hello WORLD" + +With this in place, developers can write template systems to sanitize SQL, make +safe shell operations, improve logging, tackle modern ideas in web development +(HTML, CSS, and so on), and implement lightweight, custom business DSLs. + +(Contributed by Jim Baker, Guido van Rossum, Paul Everitt, Koudai Aono, +Lysandros Nikolaou, Dave Peck, Adam Turner, Jelle Zijlstra, Bénédikt Tran, +and Pablo Galindo Salgado in :gh:`132661`.) + +.. seealso:: + :pep:`750`. + + .. _whatsnew314-pep768: PEP 768: Safe external debugger interface for CPython @@ -99,7 +202,9 @@ PEP 768: Safe external debugger interface for CPython :pep:`768` introduces a zero-overhead debugging interface that allows debuggers and profilers to safely attach to running Python processes. This is a significant enhancement to Python's -debugging capabilities allowing debuggers to forego unsafe alternatives. +debugging capabilities allowing debuggers to forego unsafe alternatives. See +:ref:`below ` for how this feature is leveraged to +implement the new :mod:`pdb` module's remote attaching capabilities. The new interface provides safe execution points for attaching debugger code without modifying the interpreter's normal execution path or adding runtime overhead. This enables tools to @@ -145,10 +250,86 @@ A key implementation detail is that the interface piggybacks on the interpreter' loop and safe points, ensuring zero overhead during normal execution while providing a reliable way for external processes to coordinate debugging operations. -See :pep:`768` for more details. - (Contributed by Pablo Galindo Salgado, Matt Wozniski, and Ivona Stojanovic in :gh:`131591`.) +.. seealso:: + :pep:`768`. + + +.. _whatsnew314-pep784: + +PEP 784: Adding Zstandard to the standard library +------------------------------------------------- + +The new ``compression`` package contains modules :mod:`!compression.lzma`, +:mod:`!compression.bz2`, :mod:`!compression.gzip` and :mod:`!compression.zlib` +which re-export the :mod:`lzma`, :mod:`bz2`, :mod:`gzip` and :mod:`zlib` +modules respectively. The new import names under ``compression`` are the +canonical names for importing these compression modules going forward. However, +the existing modules names have not been deprecated. Any deprecation or removal +of the existing compression modules will occur no sooner than five years after +the release of 3.14. + +The new :mod:`!compression.zstd` module provides compression and decompression +APIs for the Zstandard format via bindings to `Meta's zstd library +`__. Zstandard is a widely adopted, highly +efficient, and fast compression format. In addition to the APIs introduced in +:mod:`!compression.zstd`, support for reading and writing Zstandard compressed +archives has been added to the :mod:`tarfile`, :mod:`zipfile`, and +:mod:`shutil` modules. + +Here's an example of using the new module to compress some data: + +.. code-block:: python + + from compression import zstd + import math + + data = str(math.pi).encode() * 20 + + compressed = zstd.compress(data) + + ratio = len(compressed) / len(data) + print(f"Achieved compression ratio of {ratio}") + +As can be seen, the API is similar to the APIs of the :mod:`!lzma` and +:mod:`!bz2` modules. + +(Contributed by Emma Harper Smith, Adam Turner, Gregory P. Smith, Tomas Roun, +Victor Stinner, and Rogdham in :gh:`132983`) + +.. seealso:: + :pep:`784`. + + +.. _whatsnew314-remote-pdb: + +Remote attaching to a running Python process with PDB +----------------------------------------------------- + +The :mod:`pdb` module now supports remote attaching to a running Python process +using a new ``-p PID`` command-line option: + +.. code-block:: sh + + python -m pdb -p 1234 + +This will connect to the Python process with the given PID and allow you to +debug it interactively. Notice that due to how the Python interpreter works +attaching to a remote process that is blocked in a system call or waiting for +I/O will only work once the next bytecode instruction is executed or when the +process receives a signal. + +This feature uses :pep:`768` and the :func:`sys.remote_exec` function +to attach to the remote process and send the PDB commands to it. + + +(Contributed by Matt Wozniski and Pablo Galindo in :gh:`131591`.) + +.. seealso:: + :pep:`768`. + + .. _whatsnew314-pep758: PEP 758 – Allow except and except* expressions without parentheses @@ -175,21 +356,25 @@ Check :pep:`758` for more details. (Contributed by Pablo Galindo and Brett Cannon in :gh:`131831`.) +.. seealso:: + :pep:`758`. + .. _whatsnew314-pep649: -PEP 649: deferred evaluation of annotations -------------------------------------------- +PEP 649 and 749: deferred evaluation of annotations +--------------------------------------------------- The :term:`annotations ` on functions, classes, and modules are no longer evaluated eagerly. Instead, annotations are stored in special-purpose :term:`annotate functions ` and evaluated only when -necessary. This is specified in :pep:`649` and :pep:`749`. +necessary (except if ``from __future__ import annotations`` is used). +This is specified in :pep:`649` and :pep:`749`. This change is designed to make annotations in Python more performant and more usable in most circumstances. The runtime cost for defining annotations is minimized, but it remains possible to introspect annotations at runtime. -It is usually no longer necessary to enclose annotations in strings if they +It is no longer necessary to enclose annotations in strings if they contain forward references. The new :mod:`annotationlib` module provides tools for inspecting deferred @@ -225,7 +410,8 @@ writing annotations the same way you did with previous versions of Python. You will likely be able to remove quoted strings in annotations, which are frequently used for forward references. Similarly, if you use ``from __future__ import annotations`` to avoid having to write strings in annotations, you may well be able to -remove that import. However, if you rely on third-party libraries that read annotations, +remove that import once you support only Python 3.14 and newer. +However, if you rely on third-party libraries that read annotations, those libraries may need changes to support unquoted annotations before they work as expected. @@ -238,6 +424,11 @@ annotations. For example, you may want to use :func:`annotationlib.get_annotatio with the :attr:`~annotationlib.Format.FORWARDREF` format, as the :mod:`dataclasses` module now does. +The external :pypi:`typing_extensions` package provides partial backports of some of the +functionality of the :mod:`annotationlib` module, such as the :class:`~annotationlib.Format` +enum and the :func:`~annotationlib.get_annotations` function. These can be used to +write cross-version code that takes advantage of the new behavior in Python 3.14. + Related changes ^^^^^^^^^^^^^^^ @@ -249,6 +440,10 @@ functions in the standard library, there are many ways in which your code may not work in Python 3.14. To safeguard your code against future changes, use only the documented functionality of the :mod:`annotationlib` module. +In particular, do not read annotations directly from the namespace dictionary +attribute of type objects. Use :func:`annotationlib.get_annotate_from_class_namespace` +during class construction and :func:`annotationlib.get_annotations` afterwards. + ``from __future__ import annotations`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -260,15 +455,63 @@ Python without deferred evaluation of annotations, reaches its end of life in 20 In Python 3.14, the behavior of code using ``from __future__ import annotations`` is unchanged. +(Contributed by Jelle Zijlstra in :gh:`119180`; :pep:`649` was written by Larry Hastings.) + +.. seealso:: + :pep:`649` and :pep:`749`. + Improved error messages ----------------------- +* The interpreter now provides helpful suggestions when it detects typos in Python + keywords. When a word that closely resembles a Python keyword is encountered, + the interpreter will suggest the correct keyword in the error message. This + feature helps programmers quickly identify and fix common typing mistakes. For + example: + + .. code-block:: pycon + + >>> whille True: + ... pass + Traceback (most recent call last): + File "", line 1 + whille True: + ^^^^^^ + SyntaxError: invalid syntax. Did you mean 'while'? + + >>> asynch def fetch_data(): + ... pass + Traceback (most recent call last): + File "", line 1 + asynch def fetch_data(): + ^^^^^^ + SyntaxError: invalid syntax. Did you mean 'async'? + + >>> async def foo(): + ... awaid fetch_data() + Traceback (most recent call last): + File "", line 2 + awaid fetch_data() + ^^^^^ + SyntaxError: invalid syntax. Did you mean 'await'? + + >>> raisee ValueError("Error") + Traceback (most recent call last): + File "", line 1 + raisee ValueError("Error") + ^^^^^^ + SyntaxError: invalid syntax. Did you mean 'raise'? + + While the feature focuses on the most common cases, some variations of + misspellings may still result in regular syntax errors. + (Contributed by Pablo Galindo in :gh:`132449`.) + * When unpacking assignment fails due to incorrect number of variables, the error message prints the received number of values in more cases than before. (Contributed by Tushar Sadhwani in :gh:`122239`.) - .. code-block:: python + .. code-block:: pycon >>> x, y, z = 1, 2, 3, 4 Traceback (most recent call last): @@ -277,6 +520,21 @@ Improved error messages ^^^^^^^ ValueError: too many values to unpack (expected 3, got 4) +* :keyword:`elif` statements that follow an :keyword:`else` block now have a specific error message. + (Contributed by Steele Farnsworth in :gh:`129902`.) + + .. code-block:: pycon + + >>> if who == "me": + ... print("It's me!") + ... else: + ... print("It's not me!") + ... elif who is None: + ... print("Who is it?") + File "", line 5 + elif who is None: + ^^^^ + SyntaxError: 'elif' block follows an 'else' block * If a statement (:keyword:`pass`, :keyword:`del`, :keyword:`return`, :keyword:`yield`, :keyword:`raise`, :keyword:`break`, :keyword:`continue`, @@ -307,12 +565,64 @@ Improved error messages that the string may be intended to be part of the string. (Contributed by Pablo Galindo in :gh:`88535`.) - .. code-block:: python + .. code-block:: pycon >>> "The interesting object "The important object" is very important" Traceback (most recent call last): SyntaxError: invalid syntax. Is this intended to be part of the string? +* When strings have incompatible prefixes, the error now shows + which prefixes are incompatible. (Contributed by + Nikita Sobolev in :gh:`133197`.) + + .. code-block:: pycon + + >>> ub'abc' + File "", line 1 + ub'abc' + ^^ + SyntaxError: 'u' and 'b' prefixes are incompatible + +* Improved error messages when using ``as`` with incompatible targets in: + + - Imports: ``import ... as ...`` + - From imports: ``from ... import ... as ...`` + - Except handlers: ``except ... as ...`` + - Pattern-match cases: ``case ... as ...`` + + (Contributed by Nikita Sobolev in :gh:`123539`, + :gh:`123562`, and :gh:`123440`.) + + .. code-block:: pycon + + >>> import ast as arr[0] + File "", line 1 + import ast as arr[0] + ^^^^^^ + SyntaxError: cannot use subscript as import target + +* Improved error message when trying to add an instance of an unhashable type to + a :class:`dict` or :class:`set`. (Contributed by CF Bolz-Tereick and Victor Stinner + in :gh:`132828`.) + + .. code-block:: pycon + + >>> s = set() + >>> s.add({'pages': 12, 'grade': 'A'}) + Traceback (most recent call last): + File "", line 1, in + s.add({'pages': 12, 'grade': 'A'}) + ~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + TypeError: cannot use 'dict' as a set element (unhashable type: 'dict') + >>> d = {} + >>> l = [1, 2, 3] + >>> d[l] = 12 + Traceback (most recent call last): + File "", line 1, in + d[l] = 12 + ~^^^ + TypeError: cannot use 'list' as a dict key (unhashable type: 'list') + .. _whatsnew314-pep741: @@ -347,6 +657,105 @@ configuration mechanisms). .. seealso:: :pep:`741`. +.. _whatsnew314-asyncio-introspection: + +Asyncio introspection capabilities +---------------------------------- + +Added a new command-line interface to inspect running Python processes using +asynchronous tasks, available via: + +.. code-block:: bash + + python -m asyncio ps PID + +This tool inspects the given process ID (PID) and displays information about +currently running asyncio tasks. It outputs a task table: a flat +listing of all tasks, their names, their coroutine stacks, and which tasks are +awaiting them. + +.. code-block:: bash + + python -m asyncio pstree PID + +This tool fetches the same information, but renders a visual async call tree, +showing coroutine relationships in a hierarchical format. This command is +particularly useful for debugging long-running or stuck asynchronous programs. +It can help developers quickly identify where a program is blocked, what tasks +are pending, and how coroutines are chained together. + +For example given this code: + +.. code-block:: python + + import asyncio + + async def play(track): + await asyncio.sleep(5) + print(f"🎵 Finished: {track}") + + async def album(name, tracks): + async with asyncio.TaskGroup() as tg: + for track in tracks: + tg.create_task(play(track), name=track) + + async def main(): + async with asyncio.TaskGroup() as tg: + tg.create_task( + album("Sundowning", ["TNDNBTG", "Levitate"]), name="Sundowning") + tg.create_task( + album("TMBTE", ["DYWTYLM", "Aqua Regia"]), name="TMBTE") + + if __name__ == "__main__": + asyncio.run(main()) + +Executing the new tool on the running process will yield a table like this: + +.. code-block:: bash + + python -m asyncio ps 12345 + + tid task id task name coroutine chain awaiter name awaiter id + --------------------------------------------------------------------------------------------------------------------------------------- + 8138752 0x564bd3d0210 Task-1 0x0 + 8138752 0x564bd3d0410 Sundowning _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 + 8138752 0x564bd3d0610 TMBTE _aexit -> __aexit__ -> main Task-1 0x564bd3d0210 + 8138752 0x564bd3d0810 TNDNBTG _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 + 8138752 0x564bd3d0a10 Levitate _aexit -> __aexit__ -> album Sundowning 0x564bd3d0410 + 8138752 0x564bd3e0550 DYWTYLM _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 + 8138752 0x564bd3e0710 Aqua Regia _aexit -> __aexit__ -> album TMBTE 0x564bd3d0610 + + +or: + +.. code-block:: bash + + python -m asyncio pstree 12345 + + └── (T) Task-1 + └── main + └── __aexit__ + └── _aexit + ├── (T) Sundowning + │ └── album + │ └── __aexit__ + │ └── _aexit + │ ├── (T) TNDNBTG + │ └── (T) Levitate + └── (T) TMBTE + └── album + └── __aexit__ + └── _aexit + ├── (T) DYWTYLM + └── (T) Aqua Regia + +If a cycle is detected in the async await graph (which could indicate a +programming issue), the tool raises an error and lists the cycle paths that +prevent tree construction. + +(Contributed by Pablo Galindo, Łukasz Langa, Yury Selivanov, and Marta +Gomez Macias in :gh:`91048`.) + .. _whatsnew314-tail-call: A new type of interpreter @@ -399,10 +808,94 @@ For further information on how to build Python, see (Contributed by Ken Jin in :gh:`128563`, with ideas on how to implement this in CPython by Mark Shannon, Garrett Gu, Haoran Xu, and Josh Haberman.) +.. _whatsnew314-free-threaded-cpython: + +Free-threaded mode +------------------ + +Free-threaded mode (:pep:`703`), initially added in 3.13, has been significantly improved. +The implementation described in PEP 703 was finished, including C API changes, +and temporary workarounds in the interpreter were replaced with more permanent solutions. +The specializing adaptive interpreter (:pep:`659`) is now enabled in free-threaded mode, +which along with many other optimizations greatly improves its performance. +The performance penalty on single-threaded code in free-threaded mode is now roughly 5-10%, +depending on platform and C compiler used. + +This work was done by many contributors: Sam Gross, Matt Page, Neil Schemenauer, +Thomas Wouters, Donghee Na, Kirill Podoprigora, Ken Jin, Itamar Oren, +Brett Simmers, Dino Viehland, Nathan Goldbaum, Ralf Gommers, Lysandros Nikolaou, +Kumar Aditya, Edgar Margffoy, and many others. + +Some of these contributors are employed by Meta, which has continued to provide +significant engineering resources to support this project. + +From 3.14, when compiling extension modules for the free-threaded build of +CPython on Windows, the preprocessor variable ``Py_GIL_DISABLED`` now needs to +be specified by the build backend, as it will no longer be determined +automatically by the C compiler. For a running interpreter, the setting that +was used at compile time can be found using :func:`sysconfig.get_config_var`. + + +.. _whatsnew314-pyrepl-highlighting: + +Syntax highlighting in PyREPL +----------------------------- + +The default :term:`interactive` shell now highlights Python syntax as you +type. The feature is enabled by default unless the +:envvar:`PYTHON_BASIC_REPL` environment is set or any color-disabling +environment variables are used. See :ref:`using-on-controlling-color` for +details. + +The default color theme for syntax highlighting strives for good contrast +and uses exclusively the 4-bit VGA standard ANSI color codes for maximum +compatibility. The theme can be customized using an experimental API +``_colorize.set_theme()``. This can be called interactively, as well as +in the :envvar:`PYTHONSTARTUP` script. + +(Contributed by Łukasz Langa in :gh:`131507`.) + + +.. _whatsnew314-jit-compiler: + +Binary releases for the experimental just-in-time compiler +---------------------------------------------------------- + +The official macOS and Windows release binaries now include an *experimental* +just-in-time (JIT) compiler. Although it is **not** recommended for production +use, it can be tested by setting :envvar:`PYTHON_JIT=1 ` as an +environment variable. Downstream source builds and redistributors can use the +:option:`--enable-experimental-jit=yes-off` configuration option for similar +behavior. + +The JIT is at an early stage and still in active development. As such, the +typical performance impact of enabling it can range from 10% slower to 20% +faster, depending on workload. To aid in testing and evaluation, a set of +introspection functions has been provided in the :data:`sys._jit` namespace. +:func:`sys._jit.is_available` can be used to determine if the current executable +supports JIT compilation, while :func:`sys._jit.is_enabled` can be used to tell +if JIT compilation has been enabled for the current process. + +Currently, the most significant missing functionality is that native debuggers +and profilers like ``gdb`` and ``perf`` are unable to unwind through JIT frames +(Python debuggers and profilers, like :mod:`pdb` or :mod:`profile`, continue to +work without modification). Free-threaded builds do not support JIT compilation. + +Please report any bugs or major performance regressions that you encounter! + +.. seealso:: :pep:`744` + Other language changes ====================== +* The default :term:`interactive` shell now supports import autocompletion. + This means that typing ``import foo`` and pressing ```` will suggest + modules starting with ``foo``. Similarly, typing ``from foo import b`` will + suggest submodules of ``foo`` starting with ``b``. Note that autocompletion + of module attributes is not currently supported. + (Contributed by Tomas Roun in :gh:`69605`.) + * The :func:`map` built-in now has an optional keyword-only *strict* flag like :func:`zip` to check that all the iterables are of equal length. (Contributed by Wannes Boeykens in :gh:`119793`.) @@ -446,6 +939,11 @@ Other language changes ASCII :class:`bytes` and :term:`bytes-like objects `. (Contributed by Daniel Pope in :gh:`129349`.) +* Support ``\z`` as a synonym for ``\Z`` in :mod:`regular expressions `. + It is interpreted unambiguously in many other regular expression engines, + unlike ``\Z``, which has subtly different behavior. + (Contributed by Serhiy Storchaka in :gh:`133306`.) + * ``\B`` in :mod:`regular expression ` now matches empty input string. Now it is always the opposite of ``\b``. (Contributed by Serhiy Storchaka in :gh:`124130`.) @@ -458,17 +956,50 @@ Other language changes The testbed can also be used to run the test suite of projects other than CPython itself. (Contributed by Russell Keith-Magee in :gh:`127592`.) +* Three-argument :func:`pow` now tries calling :meth:`~object.__rpow__` if + necessary. Previously it was only called in two-argument :func:`!pow` and the + binary power operator. + (Contributed by Serhiy Storchaka in :gh:`130104`.) + * Add a built-in implementation for HMAC (:rfc:`2104`) using formally verified code from the `HACL* `__ project. This implementation is used as a fallback when the OpenSSL implementation of HMAC is not available. (Contributed by Bénédikt Tran in :gh:`99108`.) +* The import time flag can now track modules that are already loaded ('cached'), + via the new :option:`-X importtime=2 <-X>`. + When such a module is imported, the ``self`` and ``cumulative`` times + are replaced by the string ``cached``. + Values above ``2`` for ``-X importtime`` are now reserved for future use. + (Contributed by Noah Kim and Adam Turner in :gh:`118655`.) + +* When subclassing from a pure C type, the C slots for the new type are no + longer replaced with a wrapped version on class creation if they are not + explicitly overridden in the subclass. + (Contributed by Tomasz Pytel in :gh:`132329`.) + +* The command-line option :option:`-c` now automatically dedents its code + argument before execution. The auto-dedentation behavior mirrors + :func:`textwrap.dedent`. + (Contributed by Jon Crall and Steven Sun in :gh:`103998`.) + +* Improve error message when an object supporting the synchronous + context manager protocol is entered using :keyword:`async + with` instead of :keyword:`with`. + And vice versa with the asynchronous context manager protocol. + (Contributed by Bénédikt Tran in :gh:`128398`.) + +* :option:`!-J` is no longer a reserved flag for Jython_, + and now has no special meaning. + (Contributed by Adam Turner in :gh:`133336`.) + + .. _Jython: https://www.jython.org/ .. _whatsnew314-pep765: -PEP 765: Disallow return/break/continue that exit a finally block ------------------------------------------------------------------ +PEP 765: Disallow ``return``/``break``/``continue`` that exit a ``finally`` block +--------------------------------------------------------------------------------- The compiler emits a :exc:`SyntaxWarning` when a :keyword:`return`, :keyword:`break` or :keyword:`continue` statements appears where it exits a :keyword:`finally` block. @@ -499,6 +1030,16 @@ argparse and subparser names if mistyped by the user. (Contributed by Savannah Ostrowski in :gh:`124456`.) + .. _whatsnew314-color-argparse: + +* Introduced the optional *color* parameter to + :class:`argparse.ArgumentParser`, enabling color for help text. + This can be controlled by :ref:`environment variables + `. Color has also been enabled for help in the + :ref:`stdlib CLIs ` which use :mod:`!argparse`. + (Contributed by Hugo van Kemenade in :gh:`130645`.) + + ast --- @@ -512,27 +1053,51 @@ ast (Contributed by Irit Katriel in :gh:`123958`.) * The ``repr()`` output for AST nodes now includes more information. - (Contributed by Tomas R in :gh:`116022`.) + (Contributed by Tomas Roun in :gh:`116022`.) * :func:`ast.parse`, when called with an AST as input, now always verifies that the root node type is appropriate. (Contributed by Irit Katriel in :gh:`130139`.) +* Add new ``--feature-version``, ``--optimize``, ``--show-empty`` options to + command-line interface. + (Contributed by Semyon Moroz in :gh:`133367`.) + + +asyncio +------- + +* The function and methods named :func:`!create_task` now take an arbitrary + list of keyword arguments. All keyword arguments are passed to the + :class:`~asyncio.Task` constructor or the custom task factory. + (See :meth:`~asyncio.loop.set_task_factory` for details.) + The ``name`` and ``context`` keyword arguments are no longer special; + the name should now be set using the ``name`` keyword argument of the factory, + and ``context`` may be ``None``. + + This affects the following function and methods: + :meth:`asyncio.create_task`, + :meth:`asyncio.loop.create_task`, + :meth:`asyncio.TaskGroup.create_task`. + (Contributed by Thomas Grainger in :gh:`128307`.) + + bdb --- * The :mod:`bdb` module now supports the :mod:`sys.monitoring` backend. (Contributed by Tian Gao in :gh:`124533`.) + + .. _whatsnew314-color-calendar: + calendar -------- * By default, today's date is highlighted in color in :mod:`calendar`'s :ref:`command-line ` text output. - This can be controlled via the :envvar:`PYTHON_COLORS` environment - variable as well as the canonical |NO_COLOR|_ - and |FORCE_COLOR|_ environment variables. - See also :ref:`using-on-controlling-color`. + This can be controlled by :ref:`environment variables + `. (Contributed by Hugo van Kemenade in :gh:`128317`.) @@ -540,7 +1105,7 @@ concurrent.futures ------------------ * Add :class:`~concurrent.futures.InterpreterPoolExecutor`, - which exposes "subinterpreters (multiple Python interpreters in the + which exposes "subinterpreters" (multiple Python interpreters in the same process) to Python code. This is separate from the proposed API in :pep:`734`. (Contributed by Eric Snow in :gh:`124548`.) @@ -619,16 +1184,32 @@ ctypes :class:`~ctypes.c_double_complex` and :class:`~ctypes.c_longdouble_complex`, are now available if both the compiler and the ``libffi`` library support complex C types. - (Contributed by Sergey B Kirpichev in :gh:`61103`). + (Contributed by Sergey B Kirpichev in :gh:`61103`.) * Add :func:`ctypes.util.dllist` for listing the shared libraries loaded by the current process. (Contributed by Brian Ward in :gh:`119349`.) +* Move :func:`ctypes.POINTER` types cache from a global internal cache + (``_pointer_type_cache``) to the :attr:`ctypes._CData.__pointer_type__` + attribute of the corresponding :mod:`ctypes` types. + This will stop the cache from growing without limits in some situations. + (Contributed by Sergey Miryanov in :gh:`100926`.) + * The :class:`ctypes.py_object` type now supports subscription, making it a :term:`generic type`. (Contributed by Brian Schubert in :gh:`132168`.) +* :mod:`ctypes` now supports :term:`free-threading builds `. + (Contributed by Kumar Aditya and Peter Bierma in :gh:`127945`.) + +curses +------ + +* Add the :func:`~curses.assume_default_colors` function, + a refinement of the :func:`~curses.use_default_colors` function which + allows to change the color pair ``0``. + (Contributed by Serhiy Storchaka in :gh:`133139`.) datetime -------- @@ -643,6 +1224,10 @@ decimal :meth:`Decimal.from_number() `. (Contributed by Serhiy Storchaka in :gh:`121798`.) +* Expose :func:`decimal.IEEEContext` to support creation of contexts + corresponding to the IEEE 754 (2008) decimal interchange formats. + (Contributed by Sergey B Kirpichev in :gh:`53032`.) + difflib ------- @@ -678,6 +1263,15 @@ errno (Contributed by James Roy in :gh:`126585`.) +faulthandler +------------ + +* Add support for printing the C stack trace on systems that + :ref:`support it ` via :func:`faulthandler.dump_c_stack` + or via the *c_stack* argument in :func:`faulthandler.enable`. + (Contributed by Peter Bierma in :gh:`127604`.) + + fnmatch ------- @@ -720,12 +1314,33 @@ getopt (Contributed by Serhiy Storchaka in :gh:`126390`.) +getpass +------- + +* Support keyboard feedback by :func:`getpass.getpass` via the keyword-only + optional argument ``echo_char``. Placeholder characters are rendered whenever + a character is entered, and removed when a character is deleted. + (Contributed by Semyon Moroz in :gh:`77065`.) + + graphlib -------- * Allow :meth:`graphlib.TopologicalSorter.prepare` to be called more than once as long as sorting has not started. - (Contributed by Daniel Pope in :gh:`130914`) + (Contributed by Daniel Pope in :gh:`130914`.) + + +heapq +----- + +* Add functions for working with max-heaps: + + * :func:`heapq.heapify_max`, + * :func:`heapq.heappush_max`, + * :func:`heapq.heappop_max`, + * :func:`heapq.heapreplace_max` + * :func:`heapq.heappushpop_max` hmac @@ -804,6 +1419,13 @@ json See the :ref:`JSON command-line interface ` documentation. (Contributed by Trey Hunner in :gh:`122873`.) + .. _whatsnew314-color-json: + +* By default, the output of the :ref:`JSON command-line interface + ` is highlighted in color. + This can be controlled by :ref:`environment variables + `. + (Contributed by Tomas Roun in :gh:`131952`.) linecache --------- @@ -812,6 +1434,25 @@ linecache (Contributed by Tian Gao in :gh:`131638`.) +logging.handlers +---------------- + +* :class:`logging.handlers.QueueListener` now implements the context + manager protocol, allowing it to be used in a :keyword:`with` statement. + (Contributed by Charles Machalow in :gh:`132106`.) + +* :meth:`QueueListener.start ` now + raises a :exc:`RuntimeError` if the listener is already started. + (Contributed by Charles Machalow in :gh:`132106`.) + + +math +---- + +* Added more detailed error messages for domain errors in the module. + (Contributed by by Charlie Zhao and Sergey B Kirpichev in :gh:`101410`.) + + mimetypes --------- @@ -856,9 +1497,22 @@ mimetypes * :rfc:`2361`: Change type for ``.avi`` to ``video/vnd.avi`` and for ``.wav`` to ``audio/vnd.wave`` - * :rfc:`4337`: Add MPEG-4 ``audio/mp4`` (``.m4a``)) + * :rfc:`4337`: Add MPEG-4 ``audio/mp4`` (``.m4a``) * :rfc:`5334`: Add Ogg media (``.oga``, ``.ogg`` and ``.ogx``) + * :rfc:`6713`: Add gzip ``application/gzip`` (``.gz``) * :rfc:`9639`: Add FLAC ``audio/flac`` (``.flac``) + * Add 7z ``application/x-7z-compressed`` (``.7z``) + * Add Android Package ``application/vnd.android.package-archive`` (``.apk``) + when not strict + * Add deb ``application/x-debian-package`` (``.deb``) + * Add glTF binary ``model/gltf-binary`` (``.glb``) + * Add glTF JSON/ASCII ``model/gltf+json`` (``.gltf``) + * Add M4V ``video/x-m4v`` (``.m4v``) + * Add PHP ``application/x-httpd-php`` (``.php``) + * Add RAR ``application/vnd.rar`` (``.rar``) + * Add RPM ``application/x-rpm`` (``.rpm``) + * Add STL ``model/stl`` (``.stl``) + * Add Windows Media Video ``video/x-ms-wmv`` (``.wmv``) * De facto: Add WebM ``audio/webm`` (``.weba``) * `ECMA-376 `__: @@ -871,6 +1525,10 @@ mimetypes (Contributed by Hugo van Kemenade in :gh:`129965`.) +* Add :rfc:`9512` ``application/yaml`` MIME type for YAML files (``.yaml`` + and ``.yml``). (Contributed by Sasha "Nelie" Chernykh and Hugo van Kemenade + in :gh:`132056`.) + multiprocessing --------------- @@ -902,9 +1560,9 @@ multiprocessing * The :ref:`multiprocessing proxy objects ` for *list* and *dict* types gain previously overlooked missing methods: - * :meth:`!clear` and :meth:`!copy` for proxies of :class:`list`. + * :meth:`!clear` and :meth:`!copy` for proxies of :class:`list` * :meth:`~dict.fromkeys`, ``reversed(d)``, ``d | {}``, ``{} | d``, - ``d |= {'b': 2}`` for proxies of :class:`dict`. + ``d |= {'b': 2}`` for proxies of :class:`dict` (Contributed by Roy Hyunjin Han for :gh:`103134`.) @@ -913,6 +1571,10 @@ multiprocessing The :func:`set` in :func:`multiprocessing.Manager` method is now available. (Contributed by Mingyu Park in :gh:`129949`.) +* Add :func:`multiprocessing.Process.interrupt` which terminates the child + process by sending :py:const:`~signal.SIGINT`. This enables + :keyword:`finally` clauses to print a stack trace for the terminated + process. (Contributed by Artem Pulkin in :gh:`131913`.) operator -------- @@ -992,6 +1654,11 @@ pdb fill in a 4-space indentation now, instead of inserting a ``\t`` character. (Contributed by Tian Gao in :gh:`130471`.) +* Auto-indent is introduced in :mod:`pdb` multi-line input. It will either + keep the indentation of the last line or insert a 4-space indentation when + it detects a new code block. + (Contributed by Tian Gao in :gh:`133350`.) + * ``$_asynctask`` is added to access the current asyncio task if applicable. (Contributed by Tian Gao in :gh:`124367`.) @@ -1002,6 +1669,16 @@ pdb backend by default, which is configurable. (Contributed by Tian Gao in :gh:`124533`.) +* :func:`pdb.set_trace_async` is added to support debugging asyncio + coroutines. :keyword:`await` statements are supported with this + function. + (Contributed by Tian Gao in :gh:`132576`.) + +* Source code displayed in :mod:`pdb` will be syntax-highlighted. This feature + can be controlled using the same methods as PyREPL, in addition to the newly + added ``colorize`` argument of :class:`pdb.Pdb`. + (Contributed by Tian Gao and Łukasz Langa in :gh:`133355`.) + pickle ------ @@ -1029,6 +1706,32 @@ pydoc (Contributed by Jelle Zijlstra in :gh:`101552`.) +socket +------ + +* Improve and fix support for Bluetooth sockets. + + * Fix support of Bluetooth sockets on NetBSD and DragonFly BSD. + (Contributed by Serhiy Storchaka in :gh:`132429`.) + * Fix support for :const:`~socket.BTPROTO_HCI` on FreeBSD. + (Contributed by Victor Stinner in :gh:`111178`.) + * Add support for :const:`~socket.BTPROTO_SCO` on FreeBSD. + (Contributed by Serhiy Storchaka in :gh:`85302`.) + * Add support for *cid* and *bdaddr_type* in the address for + :const:`~socket.BTPROTO_L2CAP` on FreeBSD. + (Contributed by Serhiy Storchaka in :gh:`132429`.) + * Add support for *channel* in the address for + :const:`~socket.BTPROTO_HCI` on Linux. + (Contributed by Serhiy Storchaka in :gh:`70145`.) + * Accept an integer as the address for + :const:`~socket.BTPROTO_HCI` on Linux. + (Contributed by Serhiy Storchaka in :gh:`132099`.) + * Return *cid* in :meth:`~socket.socket.getsockname` for + :const:`~socket.BTPROTO_L2CAP`. + (Contributed by Serhiy Storchaka in :gh:`132429`.) + * Add many new constants. + (Contributed by Serhiy Storchaka in :gh:`132734`.) + ssl --- @@ -1041,8 +1744,8 @@ struct ------ * Support the :c:expr:`float complex` and :c:expr:`double complex` C types in - the :mod:`struct` module (formatting characters ``'E'`` and ``'C'``, - respectively) if the compiler has C11 complex arithmetic. + the :mod:`struct` module (formatting characters ``'F'`` and ``'D'`` + respectively). (Contributed by Sergey B Kirpichev in :gh:`121249`.) @@ -1071,6 +1774,9 @@ sys * On FreeBSD, :data:`sys.platform` doesn't contain the major version anymore. It is always ``'freebsd'``, instead of ``'freebsd13'`` or ``'freebsd14'``. +* Raise :exc:`DeprecationWarning` for :func:`sys._clear_type_cache`. This + function was deprecated in Python 3.13 but it didn't raise a runtime warning. + sys.monitoring -------------- @@ -1078,6 +1784,14 @@ sys.monitoring * Two new events are added: :monitoring-event:`BRANCH_LEFT` and :monitoring-event:`BRANCH_RIGHT`. The ``BRANCH`` event is deprecated. + +sysconfig +--------- + +* Add ``ABIFLAGS`` key to :func:`sysconfig.get_config_vars` on Windows. + (Contributed by Xuehai Pan in :gh:`131799`.) + + threading --------- @@ -1085,13 +1799,17 @@ threading to :attr:`threading.Thread.name`. (Contributed by Victor Stinner in :gh:`59705`.) + tkinter ------- -* Make tkinter widget methods :meth:`!after` and :meth:`!after_idle` accept - arguments passed by keyword. +* Make :mod:`tkinter` widget methods :meth:`!after` and :meth:`!after_idle` + accept arguments passed by keyword. (Contributed by Zhikang Yan in :gh:`126899`.) +* Add ability to specify name for :class:`!tkinter.OptionMenu` and + :class:`!tkinter.ttk.OptionMenu`. + (Contributed by Zhikang Yan in :gh:`130482`.) turtle ------ @@ -1158,14 +1876,14 @@ unicodedata * The Unicode database has been updated to Unicode 16.0.0. +.. _whatsnew314-color-unittest: + unittest -------- * :mod:`unittest` output is now colored by default. - This can be controlled via the :envvar:`PYTHON_COLORS` environment - variable as well as the canonical |NO_COLOR|_ - and |FORCE_COLOR|_ environment variables. - See also :ref:`using-on-controlling-color`. + This can be controlled by :ref:`environment variables + `. (Contributed by Hugo van Kemenade in :gh:`127221`.) * unittest discovery supports :term:`namespace package` as start @@ -1197,6 +1915,31 @@ urllib supporting SHA-256 digest authentication as specified in :rfc:`7616`. (Contributed by Calvin Bui in :gh:`128193`.) +* Improve ergonomics and standards compliance when parsing and emitting + ``file:`` URLs. + + In :func:`urllib.request.url2pathname`: + + - Accept a complete URL when the new *require_scheme* argument is set to + true. + - Discard URL authority if it matches the local hostname. + - Discard URL authority if it resolves to a local IP address when the new + *resolve_host* argument is set to true. + - Raise :exc:`~urllib.error.URLError` if a URL authority isn't local, + except on Windows where we return a UNC path as before. + + In :func:`urllib.request.pathname2url`: + + - Return a complete URL when the new *add_scheme* argument is set to true. + - Include an empty URL authority when a path begins with a slash. For + example, the path ``/etc/hosts`` is converted to the URL ``///etc/hosts``. + + On Windows, drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an :exc:`OSError` + exception to be raised. + + (Contributed by Barney Gale in :gh:`125866`.) + uuid ---- @@ -1214,6 +1957,17 @@ uuid (Contributed by Simon Legner in :gh:`131236`.) +webbrowser +---------- + +* Names in the :envvar:`BROWSER` environment variable can now refer to already + registered browsers for the :mod:`webbrowser` module, instead of always + generating a new browser command. + + This makes it possible to set :envvar:`BROWSER` to the value of one of the + supported browsers on macOS. + + zipinfo ------- @@ -1246,11 +2000,19 @@ Optimizations asyncio ------- -* :mod:`asyncio` now uses double linked list implementation for native tasks - which speeds up execution by 10% on standard pyperformance benchmarks and - reduces memory usage. +* :mod:`asyncio` has a new per-thread double linked list implementation internally for + :class:`native tasks ` which speeds up execution by 10-20% on standard + pyperformance benchmarks and reduces memory usage. + This enables external introspection tools such as + :ref:`python -m asyncio pstree ` + to introspect the call graph of asyncio tasks running in all threads. (Contributed by Kumar Aditya in :gh:`107803`.) +* :mod:`asyncio` has first class support for :term:`free-threading builds `. + This enables parallel execution of multiple event loops across different threads and scales + linearly with the number of threads. + (Contributed by Kumar Aditya in :gh:`128002`.) + * :mod:`asyncio` has new utility functions for introspecting and printing the program's call graph: :func:`asyncio.capture_call_graph` and :func:`asyncio.print_call_graph`. @@ -1332,7 +2094,6 @@ Deprecated * :class:`asyncio.WindowsProactorEventLoopPolicy` * :func:`asyncio.get_event_loop_policy` * :func:`asyncio.set_event_loop_policy` - * :func:`asyncio.set_event_loop` Users should use :func:`asyncio.run` or :class:`asyncio.Runner` with *loop_factory* to use the desired event loop implementation. @@ -1354,11 +2115,40 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`codecs`: + :func:`codecs.open` is now deprecated. Use :func:`open` instead. + (Contributed by Inada Naoki in :gh:`133036`.) + +* :mod:`ctypes`: + + * On non-Windows platforms, setting :attr:`.Structure._pack_` to use a + MSVC-compatible default memory layout is deprecated in favor of setting + :attr:`.Structure._layout_` to ``'ms'``. + (Contributed by Petr Viktorin in :gh:`131747`.) + + * Calling :func:`ctypes.POINTER` on a string is deprecated. + Use :ref:`ctypes-incomplete-types` for self-referential structures. + Also, the internal ``ctypes._pointer_type_cache`` is deprecated. + See :func:`ctypes.POINTER` for updated implementation details. + (Contributed by Sergey Myrianov in :gh:`100926`.) + * :mod:`functools`: Calling the Python implementation of :func:`functools.reduce` with *function* or *sequence* as keyword arguments is now deprecated. (Contributed by Kirill Podoprigora in :gh:`121676`.) +* :mod:`logging`: + Support for custom logging handlers with the *strm* argument is deprecated + and scheduled for removal in Python 3.16. Define handlers with the *stream* + argument instead. (Contributed by Mariusz Felisiak in :gh:`115032`.) + +* :mod:`mimetypes`: + Valid extensions start with a '.' or are empty for + :meth:`mimetypes.MimeTypes.add_type`. + Undotted extensions are deprecated and will + raise a :exc:`ValueError` in Python 3.16. + (Contributed by Hugo van Kemenade in :gh:`75223`.) + * :mod:`!nturl2path`: This module is now deprecated. Call :func:`urllib.request.url2pathname` and :func:`~urllib.request.pathname2url` instead. @@ -1387,6 +2177,13 @@ Deprecated Deprecate :meth:`symtable.Class.get_methods` due to the lack of interest. (Contributed by Bénédikt Tran in :gh:`119698`.) +* :mod:`tkinter`: + The :class:`!tkinter.Variable` methods :meth:`!trace_variable`, + :meth:`!trace_vdelete` and :meth:`!trace_vinfo` are now deprecated. + Use :meth:`!trace_add`, :meth:`!trace_remove` and :meth:`!trace_info` + instead. + (Contributed by Serhiy Storchaka in :gh:`120220`.) + * :mod:`urllib.parse`: Accepting objects with false values (like ``0`` and ``[]``) except empty strings, byte-like objects and ``None`` in :mod:`urllib.parse` functions @@ -1400,6 +2197,10 @@ Deprecated .. include:: ../deprecations/pending-removal-in-3.16.rst +.. include:: ../deprecations/pending-removal-in-3.17.rst + +.. include:: ../deprecations/pending-removal-in-3.19.rst + .. include:: ../deprecations/pending-removal-in-future.rst Removed @@ -1495,7 +2296,7 @@ asyncio asyncio.run(main()) - If you need to start something, e.g. a server listening on a socket + If you need to start something, for example, a server listening on a socket and then run forever, use :func:`asyncio.run` and an :class:`asyncio.Event`. @@ -1671,7 +2472,7 @@ Others :meth:`~object.__index__`. (Contributed by Mark Dickinson in :gh:`119743`.) -CPython Bytecode Changes +CPython bytecode changes ======================== * Replaced the opcode ``BINARY_SUBSCR`` by :opcode:`BINARY_OP` with oparg ``NB_SUBSCR``. @@ -1812,12 +2613,12 @@ New features * Add a new import and export API for Python :class:`int` objects (:pep:`757`): - * :c:func:`PyLong_GetNativeLayout`; - * :c:func:`PyLong_Export`; - * :c:func:`PyLong_FreeExport`; - * :c:func:`PyLongWriter_Create`; - * :c:func:`PyLongWriter_Finish`; - * :c:func:`PyLongWriter_Discard`. + * :c:func:`PyLong_GetNativeLayout` + * :c:func:`PyLong_Export` + * :c:func:`PyLong_FreeExport` + * :c:func:`PyLongWriter_Create` + * :c:func:`PyLongWriter_Finish` + * :c:func:`PyLongWriter_Discard` (Contributed by Sergey B Kirpichev and Victor Stinner in :gh:`102471`.) @@ -1852,6 +2653,11 @@ New features Adding ``?`` after any format unit makes ``None`` be accepted as a value. (Contributed by Serhiy Storchaka in :gh:`112068`.) +* The ``k`` and ``K`` formats in :c:func:`PyArg_ParseTuple` and + similar functions now use :meth:`~object.__index__` if available, + like all other integer formats. + (Contributed by Serhiy Storchaka in :gh:`112068`.) + * Add macros :c:func:`Py_PACK_VERSION` and :c:func:`Py_PACK_FULL_VERSION` for bit-packing Python version numbers. (Contributed by Petr Viktorin in :gh:`128629`.) @@ -1868,6 +2674,15 @@ New features take a C integer and produce a Python :class:`bool` object. (Contributed by Pablo Galindo in :issue:`45325`.) +* Add :c:func:`PyUnstable_Object_IsUniqueReferencedTemporary` to determine if an object + is a unique temporary object on the interpreter's operand stack. This can + be used in some cases as a replacement for checking if :c:func:`Py_REFCNT` + is ``1`` for Python objects passed as arguments to C API functions. + +* Add :c:func:`PyUnstable_Object_IsUniquelyReferenced` as a replacement for + ``Py_REFCNT(op) == 1`` on :term:`free threaded ` builds. + (Contributed by Peter Bierma in :gh:`133140`.) + Limited C API changes --------------------- @@ -1902,26 +2717,37 @@ Porting to Python 3.14 a :exc:`UnicodeError` object. (Contributed by Bénédikt Tran in :gh:`127691`.) +.. _whatsnew314-refcount: + +* The interpreter internally avoids some reference count modifications when + loading objects onto the operands stack by :term:`borrowing ` + references when possible. This can lead to smaller reference count values + compared to previous Python versions. C API extensions that checked + :c:func:`Py_REFCNT` of ``1`` to determine if an function argument is not + referenced by any other code should instead use + :c:func:`PyUnstable_Object_IsUniqueReferencedTemporary` as a safer replacement. + + * Private functions promoted to public C APIs: - * ``_PyBytes_Join()``: :c:func:`PyBytes_Join`. - * ``_PyLong_IsNegative()``: :c:func:`PyLong_IsNegative`. - * ``_PyLong_IsPositive()``: :c:func:`PyLong_IsPositive`. - * ``_PyLong_IsZero()``: :c:func:`PyLong_IsZero`. - * ``_PyLong_Sign()``: :c:func:`PyLong_GetSign`. - * ``_PyUnicodeWriter_Dealloc()``: :c:func:`PyUnicodeWriter_Discard`. - * ``_PyUnicodeWriter_Finish()``: :c:func:`PyUnicodeWriter_Finish`. - * ``_PyUnicodeWriter_Init()``: use :c:func:`PyUnicodeWriter_Create`. - * ``_PyUnicodeWriter_Prepare()``: (no replacement). - * ``_PyUnicodeWriter_PrepareKind()``: (no replacement). - * ``_PyUnicodeWriter_WriteChar()``: :c:func:`PyUnicodeWriter_WriteChar`. - * ``_PyUnicodeWriter_WriteStr()``: :c:func:`PyUnicodeWriter_WriteStr`. - * ``_PyUnicodeWriter_WriteSubstring()``: :c:func:`PyUnicodeWriter_WriteSubstring`. - * ``_PyUnicode_EQ()``: :c:func:`PyUnicode_Equal`. - * ``_PyUnicode_Equal()``: :c:func:`PyUnicode_Equal`. - * ``_Py_GetConfig()``: :c:func:`PyConfig_Get` and :c:func:`PyConfig_GetInt`. - * ``_Py_HashBytes()``: :c:func:`Py_HashBuffer`. - * ``_Py_fopen_obj()``: :c:func:`Py_fopen`. + * ``_PyBytes_Join()``: :c:func:`PyBytes_Join` + * ``_PyLong_IsNegative()``: :c:func:`PyLong_IsNegative` + * ``_PyLong_IsPositive()``: :c:func:`PyLong_IsPositive` + * ``_PyLong_IsZero()``: :c:func:`PyLong_IsZero` + * ``_PyLong_Sign()``: :c:func:`PyLong_GetSign` + * ``_PyUnicodeWriter_Dealloc()``: :c:func:`PyUnicodeWriter_Discard` + * ``_PyUnicodeWriter_Finish()``: :c:func:`PyUnicodeWriter_Finish` + * ``_PyUnicodeWriter_Init()``: use :c:func:`PyUnicodeWriter_Create` + * ``_PyUnicodeWriter_Prepare()``: (no replacement) + * ``_PyUnicodeWriter_PrepareKind()``: (no replacement) + * ``_PyUnicodeWriter_WriteChar()``: :c:func:`PyUnicodeWriter_WriteChar` + * ``_PyUnicodeWriter_WriteStr()``: :c:func:`PyUnicodeWriter_WriteStr` + * ``_PyUnicodeWriter_WriteSubstring()``: :c:func:`PyUnicodeWriter_WriteSubstring` + * ``_PyUnicode_EQ()``: :c:func:`PyUnicode_Equal` + * ``_PyUnicode_Equal()``: :c:func:`PyUnicode_Equal` + * ``_Py_GetConfig()``: :c:func:`PyConfig_Get` and :c:func:`PyConfig_GetInt` + * ``_Py_HashBytes()``: :c:func:`Py_HashBuffer` + * ``_Py_fopen_obj()``: :c:func:`Py_fopen` The `pythoncapi-compat project`_ can be used to get most of these new functions on Python 3.13 and older. @@ -2025,3 +2851,10 @@ Removed * Remove the private ``_Py_InitializeMain()`` function. It was a :term:`provisional API` added to Python 3.8 by :pep:`587`. (Contributed by Victor Stinner in :gh:`129033`.) + +* The undocumented APIs :c:macro:`!Py_C_RECURSION_LIMIT` and + :c:member:`!PyThreadState.c_recursion_remaining`, added in 3.13, are removed + without a deprecation period. + Please use :c:func:`Py_EnterRecursiveCall` to guard against runaway recursion + in C code. + (Removed in :gh:`133079`, see also :gh:`130396`.) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst new file mode 100644 index 00000000000..bf186c191b0 --- /dev/null +++ b/Doc/whatsnew/3.15.rst @@ -0,0 +1,279 @@ + +**************************** + What's new in Python 3.15 +**************************** + +:Editor: TBD + +.. Rules for maintenance: + + * Anyone can add text to this document. Do not spend very much time + on the wording of your changes, because your text will probably + get rewritten to some degree. + + * The maintainer will go through Misc/NEWS periodically and add + changes; it's therefore more important to add your changes to + Misc/NEWS than to this file. + + * This is not a complete list of every single change; completeness + is the purpose of Misc/NEWS. Some changes I consider too small + or esoteric to include. If such a change is added to the text, + I'll just remove it. (This is another reason you shouldn't spend + too much time on writing your addition.) + + * If you want to draw your new text to the attention of the + maintainer, add 'XXX' to the beginning of the paragraph or + section. + + * It's OK to just add a fragmentary note about a change. For + example: "XXX Describe the transmogrify() function added to the + socket module." The maintainer will research the change and + write the necessary text. + + * You can comment out your additions if you like, but it's not + necessary (especially when a final release is some months away). + + * Credit the author of a patch or bugfix. Just the name is + sufficient; the e-mail address isn't necessary. + + * It's helpful to add the issue number as a comment: + + XXX Describe the transmogrify() function added to the socket + module. + (Contributed by P.Y. Developer in :gh:`12345`.) + + This saves the maintainer the effort of going through the VCS log + when researching a change. + +This article explains the new features in Python 3.15, compared to 3.14. + +For full details, see the :ref:`changelog `. + +.. note:: + + Prerelease users should be aware that this document is currently in draft + form. It will be updated substantially as Python 3.15 moves towards release, + so it's worth checking back even after reading earlier versions. + + +Summary --- release highlights +============================== + +.. This section singles out the most important changes in Python 3.15. + Brevity is key. + + +.. PEP-sized items next. + + + +New features +============ + + + +Other language changes +====================== + +* Several error messages incorrectly using the term "argument" have been corrected. + (Contributed by Stan Ulbrych in :gh:`133382`.) + + + +New modules +=========== + +* None yet. + + +Improved modules +================ + +ssl +--- + +* Indicate through :data:`ssl.HAS_PSK_TLS13` whether the :mod:`ssl` module + supports "External PSKs" in TLSv1.3, as described in RFC 9258. + (Contributed by Will Childs-Klein in :gh:`133624`.) + + +.. Add improved modules above alphabetically, not here at the end. + +Optimizations +============= + +module_name +----------- + +* TODO + + + +Deprecated +========== + +* module_name: + TODO + + +.. Add deprecations above alphabetically, not here at the end. + +Removed +======= + +http.server +----------- + +* Removed the :class:`!CGIHTTPRequestHandler` class + and the ``--cgi`` flag from the :program:`python -m http.server` + command-line interface. They were deprecated in Python 3.13. + (Contributed by Bénédikt Tran in :gh:`133810`.) + + +platform +-------- + +* Removed the :func:`!platform.java_ver` function, + which was deprecated since Python 3.13. + (Contributed by Alexey Makridenko in :gh:`133604`.) + + +sysconfig +--------- + +* Removed the *check_home* parameter of :func:`sysconfig.is_python_build`. + (Contributed by Filipe Laíns in :gh:`92897`.) + + +threading +--------- + +* Remove support for arbitrary positional or keyword arguments in the C + implementation of :class:`~threading.RLock` objects. This was deprecated + in Python 3.14. + (Contributed by Bénédikt Tran in :gh:`134087`.) + + +typing +------ + +* The undocumented keyword argument syntax for creating + :class:`~typing.NamedTuple` classes (for example, + ``Point = NamedTuple("Point", x=int, y=int)``) is no longer supported. + Use the class-based syntax or the functional syntax instead. + (Contributed by Bénédikt Tran in :gh:`133817`.) + +* Using ``TD = TypedDict("TD")`` or ``TD = TypedDict("TD", None)`` to + construct a :class:`~typing.TypedDict` type with zero field is no + longer supported. Use ``class TD(TypedDict): pass`` + or ``TD = TypedDict("TD", {})`` instead. + (Contributed by Bénédikt Tran in :gh:`133823`.) + + +wave +---- + +* Removed the ``getmark()``, ``setmark()`` and ``getmarkers()`` methods + of the :class:`~wave.Wave_read` and :class:`~wave.Wave_write` classes, + which were deprecated since Python 3.13. + (Contributed by Bénédikt Tran in :gh:`133873`.) + + +Porting to Python 3.15 +====================== + +This section lists previously described changes and other bugfixes +that may require changes to your code. + + +Build changes +============= + + +C API changes +============= + +New features +------------ + +* TODO + +Porting to Python 3.15 +---------------------- + +* :class:`sqlite3.Connection` APIs has been cleaned up. + + * All parameters of :func:`sqlite3.connect` except *database* are now keyword-only. + * The first three parameters of methods :meth:`~sqlite3.Connection.create_function` + and :meth:`~sqlite3.Connection.create_aggregate` are now positional-only. + * The first parameter of methods :meth:`~sqlite3.Connection.set_authorizer`, + :meth:`~sqlite3.Connection.set_progress_handler` and + :meth:`~sqlite3.Connection.set_trace_callback` is now positional-only. + + (Contributed by Serhiy Storchaka in :gh:`133595`.) + +Deprecated C APIs +----------------- + +* TODO + +.. Add C API deprecations above alphabetically, not here at the end. + +Removed C APIs +-------------- + +* Remove deprecated ``PyUnicode`` functions: + + * :c:func:`!PyUnicode_AsDecodedObject`: + Use :c:func:`PyCodec_Decode` instead. + * :c:func:`!PyUnicode_AsDecodedUnicode`: + Use :c:func:`PyCodec_Decode` instead; Note that some codecs (for example, "base64") + may return a type other than :class:`str`, such as :class:`bytes`. + * :c:func:`!PyUnicode_AsEncodedObject`: + Use :c:func:`PyCodec_Encode` instead. + * :c:func:`!PyUnicode_AsEncodedUnicode`: + Use :c:func:`PyCodec_Encode` instead; Note that some codecs (for example, "base64") + may return a type other than :class:`bytes`, such as :class:`str`. + + (Contributed by Stan Ulbrych in :gh:`133612`) + +* :c:func:`!PyImport_ImportModuleNoBlock`: deprecated alias + of :c:func:`PyImport_ImportModule`. + (Contributed by Bénédikt Tran in :gh:`133644`.) + +The following functions are removed in favor of :c:func:`PyConfig_Get`. +The |pythoncapi_compat_project| can be used to get :c:func:`!PyConfig_Get` +on Python 3.13 and older. + +* Python initialization functions: + + * :c:func:`!Py_GetExecPrefix`: + use :c:func:`PyConfig_Get("base_exec_prefix") ` + (:data:`sys.base_exec_prefix`) instead. + Use :c:func:`PyConfig_Get("exec_prefix") ` + (:data:`sys.exec_prefix`) if :ref:`virtual environments ` + need to be handled. + * :c:func:`!Py_GetPath`: + use :c:func:`PyConfig_Get("module_search_paths") ` + (:data:`sys.path`) instead. + * :c:func:`!Py_GetPrefix`: + use :c:func:`PyConfig_Get("base_prefix") ` + (:data:`sys.base_prefix`) instead. + Use :c:func:`PyConfig_Get("prefix") ` + (:data:`sys.prefix`) if :ref:`virtual environments ` + need to be handled. + * :c:func:`!Py_GetProgramFullPath`: + use :c:func:`PyConfig_Get("executable") ` + (:data:`sys.executable`) instead. + * :c:func:`!Py_GetProgramName`: + use :c:func:`PyConfig_Get("executable") ` + (:data:`sys.executable`) instead. + * :c:func:`!Py_GetPythonHome`: + use :c:func:`PyConfig_Get("home") ` or the + :envvar:`PYTHONHOME` environment variable instead. + + (Contributed by Bénédikt Tran in :gh:`133644`.) + +.. |pythoncapi_compat_project| replace:: |pythoncapi_compat_project_link|_ +.. |pythoncapi_compat_project_link| replace:: pythoncapi-compat project +.. _pythoncapi_compat_project_link: https://github.com/python/pythoncapi-compat diff --git a/Doc/whatsnew/3.3.rst b/Doc/whatsnew/3.3.rst index 7a8eb47cbdb..89fd6868645 100644 --- a/Doc/whatsnew/3.3.rst +++ b/Doc/whatsnew/3.3.rst @@ -829,7 +829,7 @@ Previous versions of CPython have always relied on a global import lock. This led to unexpected annoyances, such as deadlocks when importing a module would trigger code execution in a different thread as a side-effect. Clumsy workarounds were sometimes employed, such as the -:c:func:`PyImport_ImportModuleNoBlock` C API function. +:c:func:`!PyImport_ImportModuleNoBlock` C API function. In Python 3.3, importing a module takes a per-module lock. This correctly serializes importation of a given module from multiple threads (preventing diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 7aca35b2959..bc2eb1d0e26 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -1629,8 +1629,8 @@ Build and C API Changes (Contributed by Pablo Galindo in :issue:`37221`.) * :c:func:`!Py_SetPath` now sets :data:`sys.executable` to the program full - path (:c:func:`Py_GetProgramFullPath`) rather than to the program name - (:c:func:`Py_GetProgramName`). + path (:c:func:`!Py_GetProgramFullPath`) rather than to the program name + (:c:func:`!Py_GetProgramName`). (Contributed by Victor Stinner in :issue:`38234`.) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index b062e6b4c9b..896e8f4a489 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -1161,7 +1161,8 @@ Changes in the C API .. code-block:: c int - foo_traverse(foo_struct *self, visitproc visit, void *arg) { + foo_traverse(PyObject *self, visitproc visit, void *arg) + { // Rest of the traverse function #if PY_VERSION_HEX >= 0x03090000 // This was not needed before Python 3.9 (Python issue 35810 and 40217) diff --git a/Doc/whatsnew/index.rst b/Doc/whatsnew/index.rst index 6ff722a1894..38194db670b 100644 --- a/Doc/whatsnew/index.rst +++ b/Doc/whatsnew/index.rst @@ -11,6 +11,7 @@ anyone wishing to stay up-to-date after a new release. .. toctree:: :maxdepth: 2 + 3.15.rst 3.14.rst 3.13.rst 3.12.rst diff --git a/Grammar/Tokens b/Grammar/Tokens index 20bb803b7d5..0547e6ed08f 100644 --- a/Grammar/Tokens +++ b/Grammar/Tokens @@ -1,3 +1,8 @@ +# When adding new tokens, remember to update the PEG generator in +# Tools/peg_generator/pegen/parser_generator.py +# This will ensure that older versions of Python can generate a Python parser +# using "python -m pegen python ". + ENDMARKER NAME NUMBER @@ -62,6 +67,9 @@ SOFT_KEYWORD FSTRING_START FSTRING_MIDDLE FSTRING_END +TSTRING_START +TSTRING_MIDDLE +TSTRING_END COMMENT NL ERRORTOKEN diff --git a/Grammar/python.gram b/Grammar/python.gram index de2d9c7508f..de435537095 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -79,7 +79,7 @@ _PyPegen_parse(Parser *p) # ~ # Commit to the current alternative, even if it fails to parse. # &&e -# Eager parse e. The parser will not backtrack and will immediately +# Eager parse e. The parser will not backtrack and will immediately # fail with SyntaxError if e cannot be parsed. # @@ -94,12 +94,18 @@ func_type[mod_ty]: '(' a=[type_expressions] ')' '->' b=expression NEWLINE* ENDMA # GENERAL STATEMENTS # ================== -statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) } +statements[asdl_stmt_seq*]: a=statement+ { _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a)) } -statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | a[asdl_stmt_seq*]=simple_stmts { a } +statement[asdl_stmt_seq*]: + | a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } + | a[asdl_stmt_seq*]=simple_stmts { a } + +single_compound_stmt[asdl_stmt_seq*]: + | a=compound_stmt { + _PyPegen_register_stmts(p, (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a)) } statement_newline[asdl_stmt_seq*]: - | a=compound_stmt NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } + | a=single_compound_stmt NEWLINE { a } | simple_stmts | NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, CHECK(stmt_ty, _PyAST_Pass(EXTRA))) } | ENDMARKER { _PyPegen_interactive_exit(p) } @@ -227,15 +233,17 @@ import_from_targets[asdl_alias_seq*]: import_from_as_names[asdl_alias_seq*]: | a[asdl_alias_seq*]=','.import_from_as_name+ { a } import_from_as_name[alias_ty]: - | a=NAME b=['as' z=NAME { z }] { _PyAST_alias(a->v.Name.id, - (b) ? ((expr_ty) b)->v.Name.id : NULL, - EXTRA) } + | invalid_import_from_as_name + | a=NAME b=['as' z=NAME { z }] { _PyAST_alias( + a->v.Name.id, (b) ? ((expr_ty) b)->v.Name.id : NULL, EXTRA) } + dotted_as_names[asdl_alias_seq*]: | a[asdl_alias_seq*]=','.dotted_as_name+ { a } dotted_as_name[alias_ty]: - | a=dotted_name b=['as' z=NAME { z }] { _PyAST_alias(a->v.Name.id, - (b) ? ((expr_ty) b)->v.Name.id : NULL, - EXTRA) } + | invalid_dotted_as_name + | a=dotted_name b=['as' z=NAME { z }] { _PyAST_alias( + a->v.Name.id, (b) ? ((expr_ty) b)->v.Name.id : NULL, EXTRA) } + dotted_name[expr_ty]: | a=dotted_name '.' b=NAME { _PyPegen_join_names_with_dot(p, a, b) } | NAME @@ -435,18 +443,30 @@ try_stmt[stmt_ty]: except_block[excepthandler_ty]: | invalid_except_stmt_indent - | 'except' e=expressions ':' b=block { - _PyAST_ExceptHandler(e, NULL, b, EXTRA) } - | 'except' e=expression 'as' t=NAME ':' b=block { + | 'except' e=expression ':' b=block { + _PyAST_ExceptHandler(e, NULL, b, EXTRA) } + | 'except' e=expression 'as' t=NAME ':' b=block { _PyAST_ExceptHandler(e, ((expr_ty) t)->v.Name.id, b, EXTRA) } + | 'except' e=expressions ':' b=block { + CHECK_VERSION( + excepthandler_ty, + 14, + "except expressions without parentheses are", + _PyAST_ExceptHandler(e, NULL, b, EXTRA)) } | 'except' ':' b=block { _PyAST_ExceptHandler(NULL, NULL, b, EXTRA) } | invalid_except_stmt except_star_block[excepthandler_ty]: | invalid_except_star_stmt_indent - | 'except' '*' e=expressions ':' b=block { + | 'except' '*' e=expression ':' b=block { _PyAST_ExceptHandler(e, NULL, b, EXTRA) } | 'except' '*' e=expression 'as' t=NAME ':' b=block { _PyAST_ExceptHandler(e, ((expr_ty) t)->v.Name.id, b, EXTRA) } + | 'except' '*' e=expressions ':' b=block { + CHECK_VERSION( + excepthandler_ty, + 14, + "except expressions without parentheses are", + _PyAST_ExceptHandler(e, NULL, b, EXTRA)) } | invalid_except_star_stmt finally_block[asdl_stmt_seq*]: | invalid_finally_stmt @@ -513,7 +533,7 @@ literal_pattern[pattern_ty]: literal_expr[expr_ty]: | signed_number !('+' | '-') | complex_number - | strings + | &(STRING|FSTRING_START|TSTRING_START) strings | 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) } | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) } | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) } @@ -653,7 +673,7 @@ type_alias[stmt_ty]: # Type parameter declaration # -------------------------- -type_params[asdl_type_param_seq*]: +type_params[asdl_type_param_seq*]: | invalid_type_params | '[' t=type_param_seq ']' { CHECK_VERSION(asdl_type_param_seq *, 12, "Type parameter lists are", t) } @@ -853,7 +873,7 @@ atom[expr_ty]: | 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) } | 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) } | 'None' { _PyAST_Constant(Py_None, NULL, EXTRA) } - | &(STRING|FSTRING_START) strings + | &(STRING|FSTRING_START|TSTRING_START) strings | NUMBER | &'(' (tuple | group | genexp) | &'[' (list | listcomp) @@ -929,7 +949,7 @@ fstring_middle[expr_ty]: fstring_replacement_field[expr_ty]: | '{' a=annotated_rhs debug_expr='='? conversion=[fstring_conversion] format=[fstring_full_format_spec] rbrace='}' { _PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) } - | invalid_replacement_field + | invalid_fstring_replacement_field fstring_conversion[ResultTokenWithMetadata*]: | conv_token="!" conv=NAME { _PyPegen_check_fstring_conversion(p, conv_token, conv) } fstring_full_format_spec[ResultTokenWithMetadata*]: @@ -940,8 +960,32 @@ fstring_format_spec[expr_ty]: fstring[expr_ty]: | a=FSTRING_START b=fstring_middle* c=FSTRING_END { _PyPegen_joined_str(p, a, (asdl_expr_seq*)b, c) } +tstring_format_spec_replacement_field[expr_ty]: + | '{' a=annotated_rhs debug_expr='='? conversion=[fstring_conversion] format=[tstring_full_format_spec] rbrace='}' { + _PyPegen_formatted_value(p, a, debug_expr, conversion, format, rbrace, EXTRA) } + | invalid_tstring_replacement_field +tstring_format_spec[expr_ty]: + | t=TSTRING_MIDDLE { _PyPegen_decoded_constant_from_token(p, t) } + | tstring_format_spec_replacement_field +tstring_full_format_spec[ResultTokenWithMetadata*]: + | colon=':' spec=tstring_format_spec* { _PyPegen_setup_full_format_spec(p, colon, (asdl_expr_seq *) spec, EXTRA) } +tstring_replacement_field[expr_ty]: + | '{' a=annotated_rhs debug_expr='='? conversion=[fstring_conversion] format=[tstring_full_format_spec] rbrace='}' { + _PyPegen_interpolation(p, a, debug_expr, conversion, format, rbrace, EXTRA) } + | invalid_tstring_replacement_field +tstring_middle[expr_ty]: + | tstring_replacement_field + | t=TSTRING_MIDDLE { _PyPegen_constant_from_token(p, t) } +tstring[expr_ty] (memo): + | a=TSTRING_START b=tstring_middle* c=TSTRING_END { + CHECK_VERSION( + expr_ty, + 14, + "t-strings are", + _PyPegen_template_str(p, a, (asdl_expr_seq*)b, c)) } + string[expr_ty]: s[Token*]=STRING { _PyPegen_constant_from_string(p, s) } -strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string)+ { _PyPegen_concatenate_strings(p, a, EXTRA) } +strings[expr_ty] (memo): a[asdl_expr_seq*]=(fstring|string|tstring)+ { _PyPegen_concatenate_strings(p, a, EXTRA) } list[expr_ty]: | '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) } @@ -1206,6 +1250,8 @@ invalid_expression: RAISE_SYNTAX_ERROR_KNOWN_LOCATION (a, "expected expression before 'if', but statement is given") } | a='lambda' [lambda_params] b=':' &FSTRING_MIDDLE { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "f-string: lambda expressions are not allowed without parentheses") } + | a='lambda' [lambda_params] b=':' &TSTRING_MIDDLE { + RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "t-string: lambda expressions are not allowed without parentheses") } invalid_named_expression(memo): | a=expression ':=' expression { @@ -1259,7 +1305,7 @@ invalid_dict_comprehension: RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "dict unpacking cannot be used in dict comprehension") } invalid_parameters: | a="/" ',' { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "at least one argument must precede /") } + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "at least one parameter must precede /") } | (slash_no_default | slash_with_default) param_maybe_default* a='/' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "/ may appear only once") } | slash_no_default? param_no_default* invalid_parameters_helper a=param_no_default { @@ -1273,21 +1319,21 @@ invalid_parameters: invalid_default: | a='=' &(')'|',') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "expected default value expression") } invalid_star_etc: - | a='*' (')' | ',' (')' | '**')) { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "named arguments must follow bare *") } + | a='*' (')' | ',' (')' | '**')) { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "named parameters must follow bare *") } | '*' ',' TYPE_COMMENT { RAISE_SYNTAX_ERROR("bare * has associated type comment") } - | '*' param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-positional argument cannot have default value") } + | '*' param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-positional parameter cannot have default value") } | '*' (param_no_default | ',') param_maybe_default* a='*' (param_no_default | ',') { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "* argument may appear only once") } + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "* may appear only once") } invalid_kwds: - | '**' param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-keyword argument cannot have default value") } - | '**' param ',' a=param { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "arguments cannot follow var-keyword argument") } - | '**' param ',' a[Token*]=('*'|'**'|'/') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "arguments cannot follow var-keyword argument") } + | '**' param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-keyword parameter cannot have default value") } + | '**' param ',' a=param { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "parameters cannot follow var-keyword parameter") } + | '**' param ',' a[Token*]=('*'|'**'|'/') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "parameters cannot follow var-keyword parameter") } invalid_parameters_helper: # This is only there to avoid type errors | a=slash_with_default { _PyPegen_singleton_seq(p, a) } | param_with_default+ invalid_lambda_parameters: | a="/" ',' { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "at least one argument must precede /") } + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "at least one parameter must precede /") } | (lambda_slash_no_default | lambda_slash_with_default) lambda_param_maybe_default* a='/' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "/ may appear only once") } | lambda_slash_no_default? lambda_param_no_default* invalid_lambda_parameters_helper a=lambda_param_no_default { @@ -1302,14 +1348,14 @@ invalid_lambda_parameters_helper: | a=lambda_slash_with_default { _PyPegen_singleton_seq(p, a) } | lambda_param_with_default+ invalid_lambda_star_etc: - | '*' (':' | ',' (':' | '**')) { RAISE_SYNTAX_ERROR("named arguments must follow bare *") } - | '*' lambda_param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-positional argument cannot have default value") } + | '*' (':' | ',' (':' | '**')) { RAISE_SYNTAX_ERROR("named parameters must follow bare *") } + | '*' lambda_param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-positional parameter cannot have default value") } | '*' (lambda_param_no_default | ',') lambda_param_maybe_default* a='*' (lambda_param_no_default | ',') { - RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "* argument may appear only once") } + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "* may appear only once") } invalid_lambda_kwds: - | '**' lambda_param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-keyword argument cannot have default value") } - | '**' lambda_param ',' a=lambda_param { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "arguments cannot follow var-keyword argument") } - | '**' lambda_param ',' a[Token*]=('*'|'**'|'/') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "arguments cannot follow var-keyword argument") } + | '**' lambda_param a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "var-keyword parameter cannot have default value") } + | '**' lambda_param ',' a=lambda_param { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "parameters cannot follow var-keyword parameter") } + | '**' lambda_param ',' a[Token*]=('*'|'**'|'/') { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "parameters cannot follow var-keyword parameter") } invalid_double_type_comments: | TYPE_COMMENT NEWLINE TYPE_COMMENT NEWLINE INDENT { RAISE_SYNTAX_ERROR("Cannot have two type comments on def") } @@ -1333,13 +1379,21 @@ invalid_group: invalid_import: | a='import' ','.dotted_name+ 'from' dotted_name { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "Did you mean to use 'from ... import ...' instead?") } - | 'import' token=NEWLINE { + | 'import' token=NEWLINE { RAISE_SYNTAX_ERROR_STARTING_FROM(token, "Expected one or more names after 'import'") } +invalid_dotted_as_name: + | dotted_name 'as' !(NAME (',' | ')' | NEWLINE)) a=expression { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, + "cannot use %s as import target", _PyPegen_get_expr_name(a)) } +invalid_import_from_as_name: + | NAME 'as' !(NAME (',' | ')' | NEWLINE)) a=expression { + RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, + "cannot use %s as import target", _PyPegen_get_expr_name(a)) } invalid_import_from_targets: | import_from_as_names ',' NEWLINE { RAISE_SYNTAX_ERROR("trailing comma not allowed without surrounding parentheses") } - | token=NEWLINE { + | token=NEWLINE { RAISE_SYNTAX_ERROR_STARTING_FROM(token, "Expected one or more names after 'import'") } invalid_with_stmt: @@ -1364,7 +1418,7 @@ invalid_except_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } - | 'except' expression 'as' a=expression { + | 'except' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except statement with %s", _PyPegen_get_expr_name(a)) } invalid_except_star_stmt: @@ -1372,7 +1426,7 @@ invalid_except_star_stmt: RAISE_SYNTAX_ERROR_STARTING_FROM(a, "multiple exception types must be parenthesized when using 'as'") } | a='except' '*' expression ['as' NAME ] NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='except' '*' (NEWLINE | ':') { RAISE_SYNTAX_ERROR("expected one or more exception types") } - | 'except' '*' expression 'as' a=expression { + | 'except' '*' expression 'as' a=expression ':' block { RAISE_SYNTAX_ERROR_KNOWN_LOCATION( a, "cannot use except* statement with %s", _PyPegen_get_expr_name(a)) } invalid_finally_stmt: @@ -1416,6 +1470,7 @@ invalid_elif_stmt: invalid_else_stmt: | a='else' ':' NEWLINE !INDENT { RAISE_INDENTATION_ERROR("expected an indented block after 'else' statement on line %d", a->lineno) } + | 'else' ':' block 'elif' { RAISE_SYNTAX_ERROR("'elif' block follows an 'else' block")} invalid_while_stmt: | 'while' named_expression NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | a='while' named_expression ':' NEWLINE !INDENT { @@ -1447,17 +1502,17 @@ invalid_starred_expression_unpacking: invalid_starred_expression: | '*' { RAISE_SYNTAX_ERROR("Invalid star expression") } -invalid_replacement_field: +invalid_fstring_replacement_field: | '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '='") } | '{' a='!' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '!'") } | '{' a=':' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before ':'") } | '{' a='}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "f-string: valid expression required before '}'") } - | '{' !annotated_rhs { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting a valid expression after '{'")} + | '{' !annotated_rhs { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting a valid expression after '{'") } | '{' annotated_rhs !('=' | '!' | ':' | '}') { PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '=', or '!', or ':', or '}'") } | '{' annotated_rhs '=' !('!' | ':' | '}') { PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '!', or ':', or '}'") } - | '{' annotated_rhs '='? invalid_conversion_character + | '{' annotated_rhs '='? invalid_fstring_conversion_character | '{' annotated_rhs '='? ['!' NAME] !(':' | '}') { PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting ':' or '}'") } | '{' annotated_rhs '='? ['!' NAME] ':' fstring_format_spec* !'}' { @@ -1465,10 +1520,32 @@ invalid_replacement_field: | '{' annotated_rhs '='? ['!' NAME] !'}' { PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: expecting '}'") } -invalid_conversion_character: +invalid_fstring_conversion_character: | '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: missing conversion character") } | '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("f-string: invalid conversion character") } +invalid_tstring_replacement_field: + | '{' a='=' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "t-string: valid expression required before '='") } + | '{' a='!' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "t-string: valid expression required before '!'") } + | '{' a=':' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "t-string: valid expression required before ':'") } + | '{' a='}' { RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "t-string: valid expression required before '}'") } + | '{' !annotated_rhs { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: expecting a valid expression after '{'") } + | '{' annotated_rhs !('=' | '!' | ':' | '}') { + PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: expecting '=', or '!', or ':', or '}'") } + | '{' annotated_rhs '=' !('!' | ':' | '}') { + PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: expecting '!', or ':', or '}'") } + | '{' annotated_rhs '='? invalid_tstring_conversion_character + | '{' annotated_rhs '='? ['!' NAME] !(':' | '}') { + PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: expecting ':' or '}'") } + | '{' annotated_rhs '='? ['!' NAME] ':' fstring_format_spec* !'}' { + PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: expecting '}', or format specs") } + | '{' annotated_rhs '='? ['!' NAME] !'}' { + PyErr_Occurred() ? NULL : RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: expecting '}'") } + +invalid_tstring_conversion_character: + | '!' &(':' | '}') { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: missing conversion character") } + | '!' !NAME { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("t-string: invalid conversion character") } + invalid_arithmetic: | sum ('+'|'-'|'*'|'/'|'%'|'//'|'@') a='not' b=inversion { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "'not' after an operator must be parenthesized") } invalid_factor: @@ -1477,5 +1554,5 @@ invalid_factor: invalid_type_params: | '[' token=']' { RAISE_SYNTAX_ERROR_STARTING_FROM( - token, + token, "Type parameter list cannot be empty")} diff --git a/Include/Python.h b/Include/Python.h index 64be8014589..f34d581f0b4 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -59,6 +59,14 @@ # include // __readgsqword() #endif +// Suppress known warnings in Python header files. +#if defined(_MSC_VER) +// Warning that alignas behaviour has changed. Doesn't affect us, because we +// never relied on the old behaviour. +#pragma warning(push) +#pragma warning(disable: 5274) +#endif + // Include Python header files #include "pyport.h" #include "pymacro.h" @@ -138,4 +146,9 @@ #include "cpython/pyfpe.h" #include "cpython/tracemalloc.h" +// Restore warning filter +#ifdef _MSC_VER +#pragma warning(pop) +#endif + #endif /* !Py_PYTHON_H */ diff --git a/Include/cpython/complexobject.h b/Include/cpython/complexobject.h index 28576afad0b..fbdc6a91fe8 100644 --- a/Include/cpython/complexobject.h +++ b/Include/cpython/complexobject.h @@ -9,16 +9,10 @@ typedef struct { // Operations on complex numbers. PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) _Py_cr_sum(Py_complex, double); PyAPI_FUNC(Py_complex) _Py_c_diff(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) _Py_cr_diff(Py_complex, double); -PyAPI_FUNC(Py_complex) _Py_rc_diff(double, Py_complex); PyAPI_FUNC(Py_complex) _Py_c_neg(Py_complex); PyAPI_FUNC(Py_complex) _Py_c_prod(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) _Py_cr_prod(Py_complex, double); PyAPI_FUNC(Py_complex) _Py_c_quot(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) _Py_cr_quot(Py_complex, double); -PyAPI_FUNC(Py_complex) _Py_rc_quot(double, Py_complex); PyAPI_FUNC(Py_complex) _Py_c_pow(Py_complex, Py_complex); PyAPI_FUNC(double) _Py_c_abs(Py_complex); diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h index 598cd330bc9..18249b95bef 100644 --- a/Include/cpython/funcobject.h +++ b/Include/cpython/funcobject.h @@ -97,6 +97,11 @@ static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { } #define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) +static inline PyObject* PyFunction_GET_BUILTINS(PyObject *func) { + return _PyFunction_CAST(func)->func_builtins; +} +#define PyFunction_GET_BUILTINS(func) PyFunction_GET_BUILTINS(_PyObject_CAST(func)) + static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { return _PyFunction_CAST(func)->func_module; } diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 7f28ad60b74..32e6fd73fb5 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -7,57 +7,6 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); -#define Py_ASNATIVEBYTES_DEFAULTS -1 -#define Py_ASNATIVEBYTES_BIG_ENDIAN 0 -#define Py_ASNATIVEBYTES_LITTLE_ENDIAN 1 -#define Py_ASNATIVEBYTES_NATIVE_ENDIAN 3 -#define Py_ASNATIVEBYTES_UNSIGNED_BUFFER 4 -#define Py_ASNATIVEBYTES_REJECT_NEGATIVE 8 -#define Py_ASNATIVEBYTES_ALLOW_INDEX 16 - -/* PyLong_AsNativeBytes: Copy the integer value to a native variable. - buffer points to the first byte of the variable. - n_bytes is the number of bytes available in the buffer. Pass 0 to request - the required size for the value. - flags is a bitfield of the following flags: - * 1 - little endian - * 2 - native endian - * 4 - unsigned destination (e.g. don't reject copying 255 into one byte) - * 8 - raise an exception for negative inputs - * 16 - call __index__ on non-int types - If flags is -1 (all bits set), native endian is used, value truncation - behaves most like C (allows negative inputs and allow MSB set), and non-int - objects will raise a TypeError. - Big endian mode will write the most significant byte into the address - directly referenced by buffer; little endian will write the least significant - byte into that address. - - If an exception is raised, returns a negative value. - Otherwise, returns the number of bytes that are required to store the value. - To check that the full value is represented, ensure that the return value is - equal or less than n_bytes. - All n_bytes are guaranteed to be written (unless an exception occurs), and - so ignoring a positive return value is the equivalent of a downcast in C. - In cases where the full value could not be represented, the returned value - may be larger than necessary - this function is not an accurate way to - calculate the bit length of an integer object. - */ -PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, - Py_ssize_t n_bytes, int flags); - -/* PyLong_FromNativeBytes: Create an int value from a native integer - n_bytes is the number of bytes to read from the buffer. Passing 0 will - always produce the zero int. - PyLong_FromUnsignedNativeBytes always produces a non-negative int. - flags is the same as for PyLong_AsNativeBytes, but only supports selecting - the endianness or forcing an unsigned buffer. - - Returns the int object, or NULL with an exception set. */ -PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, - int flags); -PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, - size_t n_bytes, int flags); - PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); diff --git a/Include/cpython/object.h b/Include/cpython/object.h index e2300aee7a2..973d358ed8e 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -429,81 +429,14 @@ PyAPI_FUNC(void) _Py_NO_RETURN _PyObject_AssertFailed( const char *function); -/* Trashcan mechanism, thanks to Christian Tismer. - -When deallocating a container object, it's possible to trigger an unbounded -chain of deallocations, as each Py_DECREF in turn drops the refcount on "the -next" object in the chain to 0. This can easily lead to stack overflows, -especially in threads (which typically have less stack space to work with). - -A container object can avoid this by bracketing the body of its tp_dealloc -function with a pair of macros: - -static void -mytype_dealloc(mytype *p) -{ - ... declarations go here ... - - PyObject_GC_UnTrack(p); // must untrack first - Py_TRASHCAN_BEGIN(p, mytype_dealloc) - ... The body of the deallocator goes here, including all calls ... - ... to Py_DECREF on contained objects. ... - Py_TRASHCAN_END // there should be no code after this -} - -CAUTION: Never return from the middle of the body! If the body needs to -"get out early", put a label immediately before the Py_TRASHCAN_END -call, and goto it. Else the call-depth counter (see below) will stay -above 0 forever, and the trashcan will never get emptied. - -How it works: The BEGIN macro increments a call-depth counter. So long -as this counter is small, the body of the deallocator is run directly without -further ado. But if the counter gets large, it instead adds p to a list of -objects to be deallocated later, skips the body of the deallocator, and -resumes execution after the END macro. The tp_dealloc routine then returns -without deallocating anything (and so unbounded call-stack depth is avoided). - -When the call stack finishes unwinding again, code generated by the END macro -notices this, and calls another routine to deallocate all the objects that -may have been added to the list of deferred deallocations. In effect, a -chain of N deallocations is broken into (N-1)/(Py_TRASHCAN_HEADROOM-1) pieces, -with the call stack never exceeding a depth of Py_TRASHCAN_HEADROOM. - -Since the tp_dealloc of a subclass typically calls the tp_dealloc of the base -class, we need to ensure that the trashcan is only triggered on the tp_dealloc -of the actual class being deallocated. Otherwise we might end up with a -partially-deallocated object. To check this, the tp_dealloc function must be -passed as second argument to Py_TRASHCAN_BEGIN(). -*/ - - PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op); PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate); - -/* Python 3.10 private API, invoked by the Py_TRASHCAN_BEGIN(). */ - -/* To avoid raising recursion errors during dealloc trigger trashcan before we reach - * recursion limit. To avoid trashing, we don't attempt to empty the trashcan until - * we have headroom above the trigger limit */ -#define Py_TRASHCAN_HEADROOM 50 - -/* Helper function for Py_TRASHCAN_BEGIN */ PyAPI_FUNC(int) _Py_ReachedRecursionLimitWithMargin(PyThreadState *tstate, int margin_count); -#define Py_TRASHCAN_BEGIN(op, dealloc) \ -do { \ - PyThreadState *tstate = PyThreadState_Get(); \ - if (_Py_ReachedRecursionLimitWithMargin(tstate, 2) && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ - _PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ - break; \ - } - /* The body of the deallocator is here. */ -#define Py_TRASHCAN_END \ - if (tstate->delete_later && !_Py_ReachedRecursionLimitWithMargin(tstate, 4)) { \ - _PyTrash_thread_destroy_chain(tstate); \ - } \ -} while (0); +/* For backwards compatibility with the old trashcan mechanism */ +#define Py_TRASHCAN_BEGIN(op, dealloc) +#define Py_TRASHCAN_END PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj); @@ -543,6 +476,11 @@ PyAPI_FUNC(PyRefTracer) PyRefTracer_GetTracer(void**); */ PyAPI_FUNC(int) PyUnstable_Object_EnableDeferredRefcount(PyObject *); +/* Determine if the object exists as a unique temporary variable on the + * topmost frame of the interpreter. + */ +PyAPI_FUNC(int) PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *); + /* Check whether the object is immortal. This cannot fail. */ PyAPI_FUNC(int) PyUnstable_IsImmortal(PyObject *); @@ -551,3 +489,5 @@ PyAPI_FUNC(int) PyUnstable_IsImmortal(PyObject *); // before calling this function in order to avoid spurious failures. PyAPI_FUNC(int) PyUnstable_TryIncRef(PyObject *); PyAPI_FUNC(void) PyUnstable_EnableTryIncRef(PyObject *); + +PyAPI_FUNC(int) PyUnstable_Object_IsUniquelyReferenced(PyObject *); diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index b36b4681f5d..6b63d304b0d 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -30,6 +30,7 @@ typedef struct { PyObject *end_offset; PyObject *text; PyObject *print_file_and_line; + PyObject *metadata; } PySyntaxErrorObject; typedef struct { diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index c562426767c..7f1bc363861 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -8,8 +8,6 @@ PyAPI_FUNC(int) _PyInterpreterState_RequiresIDRef(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_RequireIDRef(PyInterpreterState *, int); -PyAPI_FUNC(PyObject *) PyUnstable_InterpreterState_GetMainModule(PyInterpreterState *); - /* State unique per thread */ /* Py_tracefunc return -1 when raising an exception, or 0 for success. */ @@ -120,8 +118,6 @@ struct _ts { int py_recursion_remaining; int py_recursion_limit; - - int c_recursion_remaining; /* Retained for backwards compatibility. Do not use */ int recursion_headroom; /* Allow 50 more calls to handle any errors. */ /* 'tracing' keeps track of the execution depth when tracing/profiling. @@ -198,7 +194,7 @@ struct _ts { /* The thread's exception stack entry. (Always the last entry.) */ _PyErr_StackItem exc_state; - PyObject *previous_executor; + PyObject *current_executor; uint64_t dict_global_version; @@ -212,8 +208,6 @@ struct _ts { _PyRemoteDebuggerSupport remote_debugger_support; }; -# define Py_C_RECURSION_LIMIT 5000 - /* other API */ /* Similar to PyThreadState_Get(), but don't issue a fatal error diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 7c1459bde8f..cf830b6066f 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -29,9 +29,9 @@ # error "this header file must not be included directly" #endif -#define PYSTATS_MAX_UOP_ID 512 +#define PYSTATS_MAX_UOP_ID 1024 -#define SPECIALIZATION_FAILURE_KINDS 50 +#define SPECIALIZATION_FAILURE_KINDS 60 /* Stats for determining who is calling PyEval_EvalFrame */ #define EVAL_CALL_TOTAL 0 diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index e8b04d158b0..136f5d5c5f8 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -99,6 +99,11 @@ typedef struct { PyObject_HEAD Py_ssize_t length; /* Number of code points in the string */ Py_hash_t hash; /* Hash value; -1 if not set */ +#ifdef Py_GIL_DISABLED + /* Ensure 4 byte alignment for PyUnicode_DATA(), see gh-63736 on m68k. + In the non-free-threaded build, we'll use explicit padding instead */ + _Py_ALIGN_AS(4) +#endif struct { /* If interned is non-zero, the two references from the dictionary to this object are *not* counted in ob_refcnt. @@ -109,7 +114,12 @@ typedef struct { 3: Interned, Immortal, and Static This categorization allows the runtime to determine the right cleanup mechanism at runtime shutdown. */ - uint16_t interned; +#ifdef Py_GIL_DISABLED + // Needs to be accessed atomically, so can't be a bit field. + unsigned char interned; +#else + unsigned int interned:2; +#endif /* Character size: - PyUnicode_1BYTE_KIND (1): @@ -132,23 +142,23 @@ typedef struct { * all characters are in the range U+0000-U+10FFFF * at least one character is in the range U+10000-U+10FFFF */ - unsigned short kind:3; + unsigned int kind:3; /* Compact is with respect to the allocation scheme. Compact unicode objects only require one memory block while non-compact objects use one block for the PyUnicodeObject struct and another for its data buffer. */ - unsigned short compact:1; + unsigned int compact:1; /* The string only contains characters in the range U+0000-U+007F (ASCII) and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is set, use the PyASCIIObject structure. */ - unsigned short ascii:1; + unsigned int ascii:1; /* The object is statically allocated. */ - unsigned short statically_allocated:1; + unsigned int statically_allocated:1; +#ifndef Py_GIL_DISABLED /* Padding to ensure that PyUnicode_DATA() is always aligned to - 4 bytes (see issue #19537 on m68k) and we use unsigned short to avoid - the extra four bytes on 32-bit Windows. This is restricted features - for specific compilers including GCC, MSVC, Clang and IBM's XL compiler. */ - unsigned short :10; + 4 bytes (see issue gh-63736 on m68k) */ + unsigned int :24; +#endif } state; } PyASCIIObject; @@ -198,7 +208,7 @@ typedef struct { /* Use only if you know it's a string */ static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { #ifdef Py_GIL_DISABLED - return _Py_atomic_load_uint16_relaxed(&_PyASCIIObject_CAST(op)->state.interned); + return _Py_atomic_load_uint8_relaxed(&_PyASCIIObject_CAST(op)->state.interned); #else return _PyASCIIObject_CAST(op)->state.interned; #endif diff --git a/Include/cpython/warnings.h b/Include/cpython/warnings.h index 4e3eb88e8ff..8731fd2e96b 100644 --- a/Include/cpython/warnings.h +++ b/Include/cpython/warnings.h @@ -18,3 +18,9 @@ PyAPI_FUNC(int) PyErr_WarnExplicitFormat( // DEPRECATED: Use PyErr_WarnEx() instead. #define PyErr_Warn(category, msg) PyErr_WarnEx((category), (msg), 1) + +int _PyErr_WarnExplicitObjectWithContext( + PyObject *category, + PyObject *message, + PyObject *filename, + int lineno); diff --git a/Include/import.h b/Include/import.h index 24b23b91191..d91ebe96ca8 100644 --- a/Include/import.h +++ b/Include/import.h @@ -51,9 +51,6 @@ PyAPI_FUNC(PyObject *) PyImport_AddModuleRef( PyAPI_FUNC(PyObject *) PyImport_ImportModule( const char *name /* UTF-8 encoded string */ ); -Py_DEPRECATED(3.13) PyAPI_FUNC(PyObject *) PyImport_ImportModuleNoBlock( - const char *name /* UTF-8 encoded string */ - ); PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevel( const char *name, /* UTF-8 encoded string */ PyObject *globals, diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index d97f51b8eef..71b7ea702d6 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -634,10 +634,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl mi_track_mem_defined(block,sizeof(mi_block_t)); mi_block_t* next; #ifdef MI_ENCODE_FREELIST - next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); + next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed(&block->next), keys); #else MI_UNUSED(keys); MI_UNUSED(null); - next = (mi_block_t*)block->next; + next = (mi_block_t*)mi_atomic_load_relaxed(&block->next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); return next; @@ -646,10 +646,10 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST - block->next = mi_ptr_encode(null, next, keys); + mi_atomic_store_relaxed(&block->next, mi_ptr_encode(null, next, keys)); #else MI_UNUSED(keys); MI_UNUSED(null); - block->next = (mi_encoded_t)next; + mi_atomic_store_relaxed(&block->next, (mi_encoded_t)next); #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); } diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h index 354839ba955..4f77bd7bc52 100644 --- a/Include/internal/mimalloc/mimalloc/types.h +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -235,7 +235,7 @@ typedef size_t mi_threadid_t; // free lists contain blocks typedef struct mi_block_s { - mi_encoded_t next; + _Atomic(mi_encoded_t) next; } mi_block_t; diff --git a/Include/internal/pycore_ast.h b/Include/internal/pycore_ast.h index 69abc3536e3..60367202bab 100644 --- a/Include/internal/pycore_ast.h +++ b/Include/internal/pycore_ast.h @@ -361,9 +361,10 @@ enum _expr_kind {BoolOp_kind=1, NamedExpr_kind=2, BinOp_kind=3, UnaryOp_kind=4, ListComp_kind=9, SetComp_kind=10, DictComp_kind=11, GeneratorExp_kind=12, Await_kind=13, Yield_kind=14, YieldFrom_kind=15, Compare_kind=16, Call_kind=17, - FormattedValue_kind=18, JoinedStr_kind=19, Constant_kind=20, - Attribute_kind=21, Subscript_kind=22, Starred_kind=23, - Name_kind=24, List_kind=25, Tuple_kind=26, Slice_kind=27}; + FormattedValue_kind=18, Interpolation_kind=19, + JoinedStr_kind=20, TemplateStr_kind=21, Constant_kind=22, + Attribute_kind=23, Subscript_kind=24, Starred_kind=25, + Name_kind=26, List_kind=27, Tuple_kind=28, Slice_kind=29}; struct _expr { enum _expr_kind kind; union { @@ -459,10 +460,21 @@ struct _expr { expr_ty format_spec; } FormattedValue; + struct { + expr_ty value; + constant str; + int conversion; + expr_ty format_spec; + } Interpolation; + struct { asdl_expr_seq *values; } JoinedStr; + struct { + asdl_expr_seq *values; + } TemplateStr; + struct { constant value; string kind; @@ -820,8 +832,14 @@ expr_ty _PyAST_Call(expr_ty func, asdl_expr_seq * args, asdl_keyword_seq * expr_ty _PyAST_FormattedValue(expr_ty value, int conversion, expr_ty format_spec, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_Interpolation(expr_ty value, constant str, int conversion, + expr_ty format_spec, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena + *arena); expr_ty _PyAST_JoinedStr(asdl_expr_seq * values, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_TemplateStr(asdl_expr_seq * values, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena); expr_ty _PyAST_Constant(constant value, string kind, int lineno, int col_offset, int end_lineno, int end_col_offset, PyArena *arena); diff --git a/Include/internal/pycore_ast_state.h b/Include/internal/pycore_ast_state.h index 2f9d5ea366e..d4ac419f51d 100644 --- a/Include/internal/pycore_ast_state.h +++ b/Include/internal/pycore_ast_state.h @@ -75,6 +75,7 @@ struct ast_state { PyObject *In_singleton; PyObject *In_type; PyObject *Interactive_type; + PyObject *Interpolation_type; PyObject *Invert_singleton; PyObject *Invert_type; PyObject *IsNot_singleton; @@ -137,6 +138,7 @@ struct ast_state { PyObject *Sub_singleton; PyObject *Sub_type; PyObject *Subscript_type; + PyObject *TemplateStr_type; PyObject *TryStar_type; PyObject *Try_type; PyObject *Tuple_type; @@ -242,6 +244,7 @@ struct ast_state { PyObject *slice; PyObject *step; PyObject *stmt_type; + PyObject *str; PyObject *subject; PyObject *tag; PyObject *target; diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 942d8b107a7..454c8dde031 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -95,8 +95,10 @@ backoff_counter_triggers(_Py_BackoffCounter counter) return counter.value_and_backoff < UNREACHABLE_BACKOFF; } -/* Initial JUMP_BACKWARD counter. - * This determines when we create a trace for a loop. */ +// Initial JUMP_BACKWARD counter. +// Must be larger than ADAPTIVE_COOLDOWN_VALUE, otherwise when JIT code is +// invalidated we may construct a new trace before the bytecode has properly +// re-specialized: #define JUMP_BACKWARD_INITIAL_VALUE 4095 #define JUMP_BACKWARD_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h index 300e7f4896a..8ea9b3ebb88 100644 --- a/Include/internal/pycore_bytesobject.h +++ b/Include/internal/pycore_bytesobject.h @@ -20,8 +20,9 @@ extern PyObject* _PyBytes_FromHex( // Helper for PyBytes_DecodeEscape that detects invalid escape chars. // Export for test_peg_generator. -PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t, - const char *, const char **); +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, + const char *, + int *, const char **); // Substring Search. diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 18c8bc0624f..3d8247df31c 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -196,25 +196,6 @@ extern void _PyEval_DeactivateOpCache(void); /* --- _Py_EnterRecursiveCall() ----------------------------------------- */ -#if !_Py__has_builtin(__builtin_frame_address) && !defined(_MSC_VER) -static uintptr_t return_pointer_as_int(char* p) { - return (uintptr_t)p; -} -#endif - -static inline uintptr_t -_Py_get_machine_stack_pointer(void) { -#if _Py__has_builtin(__builtin_frame_address) - return (uintptr_t)__builtin_frame_address(0); -#elif defined(_MSC_VER) - return (uintptr_t)_AddressOfReturnAddress(); -#else - char here; - /* Avoid compiler warning about returning stack address */ - return return_pointer_as_int(&here); -#endif -} - static inline int _Py_MakeRecCheck(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; @@ -249,12 +230,7 @@ PyAPI_FUNC(void) _Py_InitializeRecursionLimits(PyThreadState *tstate); static inline int _Py_ReachedRecursionLimit(PyThreadState *tstate) { uintptr_t here_addr = _Py_get_machine_stack_pointer(); _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; - if (here_addr > _tstate->c_stack_soft_limit) { - return 0; - } - if (_tstate->c_stack_hard_limit == 0) { - _Py_InitializeRecursionLimits(tstate); - } + assert(_tstate->c_stack_hard_limit != 0); return here_addr <= _tstate->c_stack_soft_limit; } @@ -279,6 +255,7 @@ PyAPI_DATA(const conversion_func) _PyEval_ConversionFuncs[]; typedef struct _special_method { PyObject *name; const char *error; + const char *error_suggestion; // improved optional suggestion } _Py_SpecialMethod; PyAPI_DATA(const _Py_SpecialMethod) _Py_SpecialMethods[]; @@ -309,6 +286,16 @@ PyAPI_FUNC(PyObject *) _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFra PyAPI_FUNC(int) _Py_Check_ArgsIterable(PyThreadState *tstate, PyObject *func, PyObject *args); +/* + * Indicate whether a special method of given 'oparg' can use the (improved) + * alternative error message instead. Only methods loaded by LOAD_SPECIAL + * support alternative error messages. + * + * Symbol is exported for the JIT (see discussion on GH-132218). + */ +PyAPI_FUNC(int) +_PyEval_SpecialMethodCanSuggest(PyObject *self, int oparg); + /* Bits that can be set in PyThreadState.eval_breaker */ #define _PY_GIL_DROP_REQUEST_BIT (1U << 0) #define _PY_SIGNALS_PENDING_BIT (1U << 1) @@ -362,6 +349,10 @@ PyAPI_FUNC(_PyStackRef) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyS #endif #endif +#if defined(Py_REMOTE_DEBUG) && defined(Py_SUPPORTS_REMOTE_DEBUG) +extern int _PyRunRemoteDebugger(PyThreadState *tstate); +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2839b9b7ebe..439989c60f6 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -177,12 +177,14 @@ typedef struct { */ // Note that these all fit within a byte, as do combinations. -// Later, we will use the smaller numbers to differentiate the different -// kinds of locals (e.g. pos-only arg, varkwargs, local-only). -#define CO_FAST_HIDDEN 0x10 -#define CO_FAST_LOCAL 0x20 -#define CO_FAST_CELL 0x40 -#define CO_FAST_FREE 0x80 +#define CO_FAST_ARG_POS (0x02) // pos-only, pos-or-kw, varargs +#define CO_FAST_ARG_KW (0x04) // kw-only, pos-or-kw, varkwargs +#define CO_FAST_ARG_VAR (0x08) // varargs, varkwargs +#define CO_FAST_ARG (CO_FAST_ARG_POS | CO_FAST_ARG_KW | CO_FAST_ARG_VAR) +#define CO_FAST_HIDDEN (0x10) +#define CO_FAST_LOCAL (0x20) +#define CO_FAST_CELL (0x40) +#define CO_FAST_FREE (0x80) typedef unsigned char _PyLocals_Kind; @@ -315,6 +317,7 @@ extern void _Py_Specialize_ForIter(_PyStackRef iter, _Py_CODEUNIT *instr, int op extern void _Py_Specialize_Send(_PyStackRef receiver, _Py_CODEUNIT *instr); extern void _Py_Specialize_ToBool(_PyStackRef value, _Py_CODEUNIT *instr); extern void _Py_Specialize_ContainsOp(_PyStackRef value, _Py_CODEUNIT *instr); +extern void _Py_GatherStats_GetIter(_PyStackRef iterable); // Utility functions for reading/writing 32/64-bit values in the inline caches. // Great care should be taken to ensure that these functions remain correct and @@ -431,8 +434,6 @@ write_location_entry_start(uint8_t *ptr, int code, int length) * On a specialization failure, the backoff counter is restarted. */ -#include "pycore_backoff.h" - // A value of 1 means that we attempt to specialize the *second* time each // instruction is executed. Executing twice is a much better indicator of // "hotness" than executing once, but additional warmup delays only prevent @@ -450,6 +451,9 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_BACKOFF 0 // Can't assert this in pycore_backoff.h because of header order dependencies +#if JUMP_BACKWARD_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE +# error "JIT threshold value should be larger than adaptive cooldown value" +#endif #if SIDE_EXIT_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE # error "Cold exit value should be larger than adaptive cooldown value" #endif @@ -561,6 +565,102 @@ extern void _Py_ClearTLBCIndex(_PyThreadStateImpl *tstate); extern int _Py_ClearUnusedTLBC(PyInterpreterState *interp); #endif + +typedef struct { + int total; + struct co_locals_counts { + int total; + struct { + int total; + int numposonly; + int numposorkw; + int numkwonly; + int varargs; + int varkwargs; + } args; + int numpure; + struct { + int total; + // numargs does not contribute to locals.total. + int numargs; + int numothers; + } cells; + struct { + int total; + int numpure; + int numcells; + } hidden; + } locals; + int numfree; // nonlocal + struct co_unbound_counts { + int total; + struct { + int total; + int numglobal; + int numbuiltin; + int numunknown; + } globals; + int numattrs; + int numunknown; + } unbound; +} _PyCode_var_counts_t; + +PyAPI_FUNC(void) _PyCode_GetVarCounts( + PyCodeObject *, + _PyCode_var_counts_t *); +PyAPI_FUNC(int) _PyCode_SetUnboundVarCounts( + PyThreadState *, + PyCodeObject *, + _PyCode_var_counts_t *, + PyObject *globalnames, + PyObject *attrnames, + PyObject *globalsns, + PyObject *builtinsns); + + +/* "Stateless" code is a function or code object which does not rely on + * external state or internal state. It may rely on arguments and + * builtins, but not globals or a closure. Thus it does not rely + * on __globals__ or __closure__, and a stateless function + * is equivalent to its code object. + * + * Stateless code also does not keep any persistent state + * of its own, so it can't have any executors, monitoring, + * instrumentation, or "extras" (i.e. co_extra). + * + * Stateless code may create nested functions, including closures. + * However, nested functions must themselves be stateless, except they + * *can* close on the enclosing locals. + * + * Stateless code may return any value, including nested functions and closures. + * + * Stateless code that takes no arguments and doesn't return anything + * may be treated like a script. + * + * We consider stateless code to be "portable" if it does not return + * any object that holds a reference to any of the code's locals. Thus + * generators and coroutines are not portable. Likewise a function + * that returns a closure is not portable. The concept of + * portability is useful in cases where the code is run + * in a different execution context than where + * the return value will be used. */ + +PyAPI_FUNC(int) _PyCode_CheckNoInternalState(PyCodeObject *, const char **); +PyAPI_FUNC(int) _PyCode_CheckNoExternalState( + PyCodeObject *, + _PyCode_var_counts_t *, + const char **); +PyAPI_FUNC(int) _PyCode_VerifyStateless( + PyThreadState *, + PyCodeObject *, + PyObject *globalnames, + PyObject *globalsns, + PyObject *builtinsns); + +PyAPI_FUNC(int) _PyCode_CheckPureFunction(PyCodeObject *, const char **); +PyAPI_FUNC(int) _PyCode_ReturnsOnlyNone(PyCodeObject *); + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h index b98dfb0cebb..aecc50be1e6 100644 --- a/Include/internal/pycore_compile.h +++ b/Include/internal/pycore_compile.h @@ -34,8 +34,8 @@ PyAPI_FUNC(PyCodeObject*) _PyAST_Compile( int optimize, struct _arena *arena); -/* AST optimizations */ -extern int _PyCompile_AstOptimize( +/* AST preprocessing */ +extern int _PyCompile_AstPreprocess( struct _mod *mod, PyObject *filename, PyCompilerFlags *flags, @@ -43,7 +43,7 @@ extern int _PyCompile_AstOptimize( struct _arena *arena, int syntax_check_only); -extern int _PyAST_Optimize( +extern int _PyAST_Preprocess( struct _mod *, struct _arena *arena, PyObject *filename, @@ -133,6 +133,8 @@ int _PyCompile_EnterScope(struct _PyCompiler *c, identifier name, int scope_type void _PyCompile_ExitScope(struct _PyCompiler *c); Py_ssize_t _PyCompile_AddConst(struct _PyCompiler *c, PyObject *o); _PyInstructionSequence *_PyCompile_InstrSequence(struct _PyCompiler *c); +int _PyCompile_StartAnnotationSetup(struct _PyCompiler *c); +int _PyCompile_EndAnnotationSetup(struct _PyCompiler *c); int _PyCompile_FutureFeatures(struct _PyCompiler *c); void _PyCompile_DeferredAnnotations( struct _PyCompiler *c, PyObject **deferred_annotations, diff --git a/Include/internal/pycore_complexobject.h b/Include/internal/pycore_complexobject.h index 54713536eed..f595f6ab7a6 100644 --- a/Include/internal/pycore_complexobject.h +++ b/Include/internal/pycore_complexobject.h @@ -19,6 +19,15 @@ extern int _PyComplex_FormatAdvancedWriter( Py_ssize_t start, Py_ssize_t end); +// Operations on complex numbers. +PyAPI_FUNC(Py_complex) _Py_cr_sum(Py_complex, double); +PyAPI_FUNC(Py_complex) _Py_cr_diff(Py_complex, double); +PyAPI_FUNC(Py_complex) _Py_rc_diff(double, Py_complex); +PyAPI_FUNC(Py_complex) _Py_cr_prod(Py_complex, double); +PyAPI_FUNC(Py_complex) _Py_cr_quot(Py_complex, double); +PyAPI_FUNC(Py_complex) _Py_rc_quot(double, Py_complex); + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index d3226d2b425..12729274138 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -57,7 +57,7 @@ struct _xidata { // likely a registered "xidatafunc", is responsible for // ensuring it owns the reference (i.e. incref). PyObject *obj; - // interp is the ID of the owning interpreter of the original + // interpid is the ID of the owning interpreter of the original // object. It corresponds to the active interpreter when // _PyObject_GetXIData() was called. This should only // be set by the cross-interpreter machinery. @@ -93,41 +93,6 @@ PyAPI_FUNC(void) _PyXIData_Free(_PyXIData_t *data); // Users should not need getters for "new_object" or "free". -/* getting cross-interpreter data */ - -typedef int (*xidatafunc)(PyThreadState *tstate, PyObject *, _PyXIData_t *); - -typedef struct _xid_lookup_state _PyXIData_lookup_t; - -typedef struct { - _PyXIData_lookup_t *global; - _PyXIData_lookup_t *local; - PyObject *PyExc_NotShareableError; -} _PyXIData_lookup_context_t; - -PyAPI_FUNC(int) _PyXIData_GetLookupContext( - PyInterpreterState *, - _PyXIData_lookup_context_t *); - -PyAPI_FUNC(xidatafunc) _PyXIData_Lookup( - _PyXIData_lookup_context_t *, - PyObject *); -PyAPI_FUNC(int) _PyObject_CheckXIData( - _PyXIData_lookup_context_t *, - PyObject *); -PyAPI_FUNC(int) _PyObject_GetXIData( - _PyXIData_lookup_context_t *, - PyObject *, - _PyXIData_t *); - - -/* using cross-interpreter data */ - -PyAPI_FUNC(PyObject *) _PyXIData_NewObject(_PyXIData_t *); -PyAPI_FUNC(int) _PyXIData_Release(_PyXIData_t *); -PyAPI_FUNC(int) _PyXIData_ReleaseAndRawFree(_PyXIData_t *); - - /* defining cross-interpreter data */ PyAPI_FUNC(void) _PyXIData_Init( @@ -138,7 +103,7 @@ PyAPI_FUNC(int) _PyXIData_InitWithSize( _PyXIData_t *, PyInterpreterState *interp, const size_t, PyObject *, xid_newobjfunc); -PyAPI_FUNC(void) _PyXIData_Clear( PyInterpreterState *, _PyXIData_t *); +PyAPI_FUNC(void) _PyXIData_Clear(PyInterpreterState *, _PyXIData_t *); // Normally the Init* functions are sufficient. The only time // additional initialization might be needed is to set the "free" func, @@ -147,6 +112,8 @@ PyAPI_FUNC(void) _PyXIData_Clear( PyInterpreterState *, _PyXIData_t *); do { \ (DATA)->free = (FUNC); \ } while (0) +#define _PyXIData_CHECK_FREE(DATA, FUNC) \ + ((DATA)->free == (FUNC)) // Additionally, some shareable types are essentially light wrappers // around other shareable types. The xidatafunc of the wrapper // can often be implemented by calling the wrapped object's @@ -158,6 +125,115 @@ PyAPI_FUNC(void) _PyXIData_Clear( PyInterpreterState *, _PyXIData_t *); do { \ (DATA)->new_object = (FUNC); \ } while (0) +#define _PyXIData_CHECK_NEW_OBJECT(DATA, FUNC) \ + ((DATA)->new_object == (FUNC)) + + +/* getting cross-interpreter data */ + +typedef int xidata_fallback_t; +#define _PyXIDATA_XIDATA_ONLY (0) +#define _PyXIDATA_FULL_FALLBACK (1) + +// Technically, we don't need two different function types; +// we could go with just the fallback one. However, only container +// types like tuple need it, so always having the extra arg would be +// a bit unfortunate. It's also nice to be able to clearly distinguish +// between types that might call _PyObject_GetXIData() and those that won't. +// +typedef int (*xidatafunc)(PyThreadState *, PyObject *, _PyXIData_t *); +typedef int (*xidatafbfunc)( + PyThreadState *, PyObject *, xidata_fallback_t, _PyXIData_t *); +typedef struct { + xidatafunc basic; + xidatafbfunc fallback; +} _PyXIData_getdata_t; + +PyAPI_FUNC(PyObject *) _PyXIData_GetNotShareableErrorType(PyThreadState *); +PyAPI_FUNC(void) _PyXIData_SetNotShareableError(PyThreadState *, const char *); +PyAPI_FUNC(void) _PyXIData_FormatNotShareableError( + PyThreadState *, + const char *, + ...); + +PyAPI_FUNC(_PyXIData_getdata_t) _PyXIData_Lookup( + PyThreadState *, + PyObject *); +PyAPI_FUNC(int) _PyObject_CheckXIData( + PyThreadState *, + PyObject *); + +PyAPI_FUNC(int) _PyObject_GetXIDataNoFallback( + PyThreadState *, + PyObject *, + _PyXIData_t *); +PyAPI_FUNC(int) _PyObject_GetXIData( + PyThreadState *, + PyObject *, + xidata_fallback_t, + _PyXIData_t *); + +// _PyObject_GetXIData() for bytes +typedef struct { + const char *bytes; + Py_ssize_t len; +} _PyBytes_data_t; +PyAPI_FUNC(int) _PyBytes_GetData(PyObject *, _PyBytes_data_t *); +PyAPI_FUNC(PyObject *) _PyBytes_FromData(_PyBytes_data_t *); +PyAPI_FUNC(PyObject *) _PyBytes_FromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyBytes_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); +PyAPI_FUNC(_PyBytes_data_t *) _PyBytes_GetXIDataWrapped( + PyThreadState *, + PyObject *, + size_t, + xid_newobjfunc, + _PyXIData_t *); + +// _PyObject_GetXIData() for pickle +PyAPI_DATA(PyObject *) _PyPickle_LoadFromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyPickle_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); + +// _PyObject_GetXIData() for marshal +PyAPI_FUNC(PyObject *) _PyMarshal_ReadObjectFromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyMarshal_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); + +// _PyObject_GetXIData() for code objects +PyAPI_FUNC(PyObject *) _PyCode_FromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetScriptXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); +PyAPI_FUNC(int) _PyCode_GetPureScriptXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); + +// _PyObject_GetXIData() for functions +PyAPI_FUNC(PyObject *) _PyFunction_FromXIData(_PyXIData_t *); +PyAPI_FUNC(int) _PyFunction_GetXIData( + PyThreadState *, + PyObject *, + _PyXIData_t *); + + +/* using cross-interpreter data */ + +PyAPI_FUNC(PyObject *) _PyXIData_NewObject(_PyXIData_t *); +PyAPI_FUNC(int) _PyXIData_Release(_PyXIData_t *); +PyAPI_FUNC(int) _PyXIData_ReleaseAndRawFree(_PyXIData_t *); /* cross-interpreter data registry */ @@ -171,6 +247,8 @@ PyAPI_FUNC(void) _PyXIData_Clear( PyInterpreterState *, _PyXIData_t *); /* runtime state & lifecycle */ /*****************************/ +typedef struct _xid_lookup_state _PyXIData_lookup_t; + typedef struct { // builtin types _PyXIData_lookup_t data_lookup; @@ -257,24 +335,9 @@ typedef struct _sharedexception { PyAPI_FUNC(PyObject *) _PyXI_ApplyError(_PyXI_error *err); -typedef struct xi_session _PyXI_session; -typedef struct _sharedns _PyXI_namespace; - -PyAPI_FUNC(void) _PyXI_FreeNamespace(_PyXI_namespace *ns); -PyAPI_FUNC(_PyXI_namespace *) _PyXI_NamespaceFromNames(PyObject *names); -PyAPI_FUNC(int) _PyXI_FillNamespaceFromDict( - _PyXI_namespace *ns, - PyObject *nsobj, - _PyXI_session *session); -PyAPI_FUNC(int) _PyXI_ApplyNamespace( - _PyXI_namespace *ns, - PyObject *nsobj, - PyObject *dflt); - - // A cross-interpreter session involves entering an interpreter -// (_PyXI_Enter()), doing some work with it, and finally exiting -// that interpreter (_PyXI_Exit()). +// with _PyXI_Enter(), doing some work with it, and finally exiting +// that interpreter with _PyXI_Exit(). // // At the boundaries of the session, both entering and exiting, // data may be exchanged between the previous interpreter and the @@ -282,39 +345,10 @@ PyAPI_FUNC(int) _PyXI_ApplyNamespace( // isolation between interpreters. This includes setting objects // in the target's __main__ module on the way in, and capturing // uncaught exceptions on the way out. -struct xi_session { - // Once a session has been entered, this is the tstate that was - // current before the session. If it is different from cur_tstate - // then we must have switched interpreters. Either way, this will - // be the current tstate once we exit the session. - PyThreadState *prev_tstate; - // Once a session has been entered, this is the current tstate. - // It must be current when the session exits. - PyThreadState *init_tstate; - // This is true if init_tstate needs cleanup during exit. - int own_init_tstate; +typedef struct xi_session _PyXI_session; - // This is true if, while entering the session, init_thread took - // "ownership" of the interpreter's __main__ module. This means - // it is the only thread that is allowed to run code there. - // (Caveat: for now, users may still run exec() against the - // __main__ module's dict, though that isn't advisable.) - int running; - // This is a cached reference to the __dict__ of the entered - // interpreter's __main__ module. It is looked up when at the - // beginning of the session as a convenience. - PyObject *main_ns; - - // This is set if the interpreter is entered and raised an exception - // that needs to be handled in some special way during exit. - _PyXI_errcode *error_override; - // This is set if exit captured an exception to propagate. - _PyXI_error *error; - - // -- pre-allocated memory -- - _PyXI_error _error; - _PyXI_errcode _error_override; -}; +PyAPI_FUNC(_PyXI_session *) _PyXI_NewSession(void); +PyAPI_FUNC(void) _PyXI_FreeSession(_PyXI_session *); PyAPI_FUNC(int) _PyXI_Enter( _PyXI_session *session, @@ -322,6 +356,8 @@ PyAPI_FUNC(int) _PyXI_Enter( PyObject *nsupdates); PyAPI_FUNC(void) _PyXI_Exit(_PyXI_session *session); +PyAPI_FUNC(PyObject *) _PyXI_GetMainNamespace(_PyXI_session *); + PyAPI_FUNC(PyObject *) _PyXI_ApplyCapturedException(_PyXI_session *session); PyAPI_FUNC(int) _PyXI_HasCapturedException(_PyXI_session *session); diff --git a/Include/internal/pycore_crossinterp_data_registry.h b/Include/internal/pycore_crossinterp_data_registry.h index bbad4de7708..fbb4cad5cac 100644 --- a/Include/internal/pycore_crossinterp_data_registry.h +++ b/Include/internal/pycore_crossinterp_data_registry.h @@ -17,7 +17,7 @@ typedef struct _xid_regitem { /* This is NULL for builtin types. */ PyObject *weakref; size_t refcount; - xidatafunc getdata; + _PyXIData_getdata_t getdata; } _PyXIData_regitem_t; typedef struct { @@ -28,11 +28,11 @@ typedef struct { } _PyXIData_registry_t; PyAPI_FUNC(int) _PyXIData_RegisterClass( - _PyXIData_lookup_context_t *, + PyThreadState *, PyTypeObject *, - xidatafunc); + _PyXIData_getdata_t); PyAPI_FUNC(int) _PyXIData_UnregisterClass( - _PyXIData_lookup_context_t *, + PyThreadState *, PyTypeObject *); struct _xid_lookup_state { diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index b280633c9ef..1a265c59ff8 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -52,14 +52,18 @@ extern "C" { #ifdef Py_GIL_DISABLED # define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) # define _Py_Debug_Free_Threaded 1 +# define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc) +# define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index) #else # define _Py_Debug_gilruntimestate_enabled 0 # define _Py_Debug_Free_Threaded 0 +# define _Py_Debug_code_object_co_tlbc 0 +# define _Py_Debug_interpreter_frame_tlbc_index 0 #endif typedef struct _Py_DebugOffsets { - char cookie[8]; + char cookie[8] _Py_NONSTRING; uint64_t version; uint64_t free_threaded; // Runtime state offset; @@ -109,6 +113,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplus; uint64_t owner; uint64_t stackpointer; + uint64_t tlbc_index; } interpreter_frame; // Code object offset; @@ -123,6 +128,7 @@ typedef struct _Py_DebugOffsets { uint64_t localsplusnames; uint64_t localspluskinds; uint64_t co_code_adaptive; + uint64_t co_tlbc; } code_object; // PyObject offset; @@ -265,6 +271,7 @@ typedef struct _Py_DebugOffsets { .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ .owner = offsetof(_PyInterpreterFrame, owner), \ .stackpointer = offsetof(_PyInterpreterFrame, stackpointer), \ + .tlbc_index = _Py_Debug_interpreter_frame_tlbc_index, \ }, \ .code_object = { \ .size = sizeof(PyCodeObject), \ @@ -277,6 +284,7 @@ typedef struct _Py_DebugOffsets { .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ + .co_tlbc = _Py_Debug_code_object_co_tlbc, \ }, \ .pyobject = { \ .size = sizeof(PyObject), \ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 754eb88a85c..25bb224921a 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -150,6 +150,8 @@ extern int _PyDict_Pop_KnownHash( Py_hash_t hash, PyObject **result); +extern void _PyDict_Clear_LockHeld(PyObject *op); + #ifdef Py_GIL_DISABLED PyAPI_FUNC(void) _PyDict_EnsureSharedOnRead(PyDictObject *mp); #endif diff --git a/Include/internal/pycore_emscripten_trampoline.h b/Include/internal/pycore_emscripten_trampoline.h index 7946eb5a74e..16916f1a8eb 100644 --- a/Include/internal/pycore_emscripten_trampoline.h +++ b/Include/internal/pycore_emscripten_trampoline.h @@ -37,17 +37,18 @@ _PyEM_TrampolineCall(PyCFunctionWithKeywords func, PyObject* kw); #define _PyCFunction_TrampolineCall(meth, self, args) \ - _PyEM_TrampolineCall( \ - (*(PyCFunctionWithKeywords)(void(*)(void))(meth)), (self), (args), NULL) + _PyEM_TrampolineCall(*_PyCFunctionWithKeywords_CAST(meth), (self), (args), NULL) #define _PyCFunctionWithKeywords_TrampolineCall(meth, self, args, kw) \ _PyEM_TrampolineCall((meth), (self), (args), (kw)) -#define descr_set_trampoline_call(set, obj, value, closure) \ - ((int)_PyEM_TrampolineCall((PyCFunctionWithKeywords)(set), (obj), (value), (PyObject*)(closure))) +#define descr_set_trampoline_call(set, obj, value, closure) \ + ((int)_PyEM_TrampolineCall(_PyCFunctionWithKeywords_CAST(set), (obj), \ + (value), (PyObject*)(closure))) -#define descr_get_trampoline_call(get, obj, closure) \ - _PyEM_TrampolineCall((PyCFunctionWithKeywords)(get), (obj), (PyObject*)(closure), NULL) +#define descr_get_trampoline_call(get, obj, closure) \ + _PyEM_TrampolineCall(_PyCFunctionWithKeywords_CAST(get), (obj), \ + (PyObject*)(closure), NULL) #else // defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) diff --git a/Include/internal/pycore_faulthandler.h b/Include/internal/pycore_faulthandler.h index 6dd7d8d7ca9..78cd657e6ae 100644 --- a/Include/internal/pycore_faulthandler.h +++ b/Include/internal/pycore_faulthandler.h @@ -56,6 +56,7 @@ struct _faulthandler_runtime_state { #ifdef MS_WINDOWS void *exc_handler; #endif + int c_stack; } fatal_error; struct { diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index 209252b2ddc..a30d52d49bd 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -35,6 +35,13 @@ PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_cod extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); + +/* See pycore_code.h for explanation about what "stateless" means. */ + +PyAPI_FUNC(int) +_PyFunction_VerifyStateless(PyThreadState *, PyObject *); + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 410a3734f1a..d896e870630 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -587,7 +587,9 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__and__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__anext__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__annotate__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__annotate_func__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__annotations__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__annotations_cache__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__args__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__await__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__bases__)); @@ -790,7 +792,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add_done_callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_child)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_parent)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aggregate_class)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all)); @@ -807,7 +808,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ast)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(athrow)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(attribute)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(authorizer_callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(autocommit)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(backtick)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(base)); @@ -832,6 +832,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes_per_sep)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_call)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_exception)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_parameter_type)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_return)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_datetime_module)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_statements)); @@ -878,6 +879,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(consts)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(context)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(contravariant)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(conversion)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cookie)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(copy)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(copyreg)); @@ -885,6 +887,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(d_parameter_type)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day)); @@ -935,6 +938,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exception)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(existing_file_name)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(expression)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extend)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extra_tokens)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(facility)); @@ -964,6 +968,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format_spec)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(frame_buffer)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(from_param)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromlist)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromtimestamp)); @@ -1021,6 +1026,9 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intern)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intersection)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(interval)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(io)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_compress)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_raw)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_running)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_struct)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isatty)); @@ -1097,7 +1105,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(msg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mutex)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mycmp)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_arg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_fields)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_sequence_fields)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_unnamed_fields)); @@ -1105,7 +1112,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name)); @@ -1144,6 +1150,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parameter)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(path)); @@ -1162,7 +1169,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(print_file_and_line)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(priority)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_handler)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_routine)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(proto)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(protocol)); @@ -1268,7 +1274,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timetuple)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timeunit)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(top)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trace_callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(traceback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trailers)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(translate)); @@ -1305,6 +1310,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(year)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zdict)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zstd_dict)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[0]); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[1]); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[2]); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index cadbc01b01d..a06d7495bab 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -78,7 +78,9 @@ struct _Py_global_strings { STRUCT_FOR_ID(__and__) STRUCT_FOR_ID(__anext__) STRUCT_FOR_ID(__annotate__) + STRUCT_FOR_ID(__annotate_func__) STRUCT_FOR_ID(__annotations__) + STRUCT_FOR_ID(__annotations_cache__) STRUCT_FOR_ID(__args__) STRUCT_FOR_ID(__await__) STRUCT_FOR_ID(__bases__) @@ -281,7 +283,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(add_done_callback) STRUCT_FOR_ID(after_in_child) STRUCT_FOR_ID(after_in_parent) - STRUCT_FOR_ID(aggregate_class) STRUCT_FOR_ID(alias) STRUCT_FOR_ID(align) STRUCT_FOR_ID(all) @@ -298,7 +299,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(ast) STRUCT_FOR_ID(athrow) STRUCT_FOR_ID(attribute) - STRUCT_FOR_ID(authorizer_callback) STRUCT_FOR_ID(autocommit) STRUCT_FOR_ID(backtick) STRUCT_FOR_ID(base) @@ -323,6 +323,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(bytes_per_sep) STRUCT_FOR_ID(c_call) STRUCT_FOR_ID(c_exception) + STRUCT_FOR_ID(c_parameter_type) STRUCT_FOR_ID(c_return) STRUCT_FOR_ID(cached_datetime_module) STRUCT_FOR_ID(cached_statements) @@ -369,6 +370,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(consts) STRUCT_FOR_ID(context) STRUCT_FOR_ID(contravariant) + STRUCT_FOR_ID(conversion) STRUCT_FOR_ID(cookie) STRUCT_FOR_ID(copy) STRUCT_FOR_ID(copyreg) @@ -376,6 +378,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(count) STRUCT_FOR_ID(covariant) STRUCT_FOR_ID(cwd) + STRUCT_FOR_ID(d_parameter_type) STRUCT_FOR_ID(data) STRUCT_FOR_ID(database) STRUCT_FOR_ID(day) @@ -426,6 +429,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(exception) STRUCT_FOR_ID(existing_file_name) STRUCT_FOR_ID(exp) + STRUCT_FOR_ID(expression) STRUCT_FOR_ID(extend) STRUCT_FOR_ID(extra_tokens) STRUCT_FOR_ID(facility) @@ -455,6 +459,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(follow_symlinks) STRUCT_FOR_ID(format) STRUCT_FOR_ID(format_spec) + STRUCT_FOR_ID(frame_buffer) STRUCT_FOR_ID(from_param) STRUCT_FOR_ID(fromlist) STRUCT_FOR_ID(fromtimestamp) @@ -512,6 +517,9 @@ struct _Py_global_strings { STRUCT_FOR_ID(intern) STRUCT_FOR_ID(intersection) STRUCT_FOR_ID(interval) + STRUCT_FOR_ID(io) + STRUCT_FOR_ID(is_compress) + STRUCT_FOR_ID(is_raw) STRUCT_FOR_ID(is_running) STRUCT_FOR_ID(is_struct) STRUCT_FOR_ID(isatty) @@ -588,7 +596,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(msg) STRUCT_FOR_ID(mutex) STRUCT_FOR_ID(mycmp) - STRUCT_FOR_ID(n_arg) STRUCT_FOR_ID(n_fields) STRUCT_FOR_ID(n_sequence_fields) STRUCT_FOR_ID(n_unnamed_fields) @@ -596,7 +603,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(name_from) STRUCT_FOR_ID(namespace_separator) STRUCT_FOR_ID(namespaces) - STRUCT_FOR_ID(narg) STRUCT_FOR_ID(ndigits) STRUCT_FOR_ID(nested) STRUCT_FOR_ID(new_file_name) @@ -635,6 +641,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(overlapped) STRUCT_FOR_ID(owner) STRUCT_FOR_ID(pages) + STRUCT_FOR_ID(parameter) STRUCT_FOR_ID(parent) STRUCT_FOR_ID(password) STRUCT_FOR_ID(path) @@ -653,7 +660,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(print_file_and_line) STRUCT_FOR_ID(priority) STRUCT_FOR_ID(progress) - STRUCT_FOR_ID(progress_handler) STRUCT_FOR_ID(progress_routine) STRUCT_FOR_ID(proto) STRUCT_FOR_ID(protocol) @@ -759,7 +765,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(timetuple) STRUCT_FOR_ID(timeunit) STRUCT_FOR_ID(top) - STRUCT_FOR_ID(trace_callback) STRUCT_FOR_ID(traceback) STRUCT_FOR_ID(trailers) STRUCT_FOR_ID(translate) @@ -796,6 +801,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(write_through) STRUCT_FOR_ID(year) STRUCT_FOR_ID(zdict) + STRUCT_FOR_ID(zstd_dict) } identifiers; struct { PyASCIIObject _ascii; diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index 3acafd02bbd..13fbff4eb65 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -63,6 +63,7 @@ extern void _PyImport_SetDLOpenFlags(PyInterpreterState *interp, int new_val); extern PyObject * _PyImport_InitModules(PyInterpreterState *interp); extern PyObject * _PyImport_GetModules(PyInterpreterState *interp); +extern PyObject * _PyImport_GetModulesRef(PyInterpreterState *interp); extern void _PyImport_ClearModules(PyInterpreterState *interp); extern void _PyImport_ClearModulesByIndex(PyInterpreterState *interp); diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h index 525a16f6b97..3ba9229cc21 100644 --- a/Include/internal/pycore_importdl.h +++ b/Include/internal/pycore_importdl.h @@ -107,7 +107,7 @@ extern int _PyImport_RunModInitFunc( #include typedef FARPROC dl_funcptr; -#ifdef _DEBUG +#ifdef Py_DEBUG # define PYD_DEBUG_SUFFIX "_d" #else # define PYD_DEBUG_SUFFIX "" diff --git a/Include/internal/pycore_instruction_sequence.h b/Include/internal/pycore_instruction_sequence.h index 099f2fd1240..b5c92773537 100644 --- a/Include/internal/pycore_instruction_sequence.h +++ b/Include/internal/pycore_instruction_sequence.h @@ -45,6 +45,9 @@ typedef struct instruction_sequence { /* PyList of instruction sequences of nested functions */ PyObject *s_nested; + + /* Code for creating annotations, spliced into the main sequence later */ + struct instruction_sequence *s_annotations_code; } _PyInstructionSequence; typedef struct { @@ -66,6 +69,8 @@ _PyJumpTargetLabel _PyInstructionSequence_NewLabel(_PyInstructionSequence *seq); int _PyInstructionSequence_ApplyLabelMap(_PyInstructionSequence *seq); int _PyInstructionSequence_InsertInstruction(_PyInstructionSequence *seq, int pos, int opcode, int oparg, _Py_SourceLocation loc); +int _PyInstructionSequence_SetAnnotationsCode(_PyInstructionSequence *seq, + _PyInstructionSequence *annotations); int _PyInstructionSequence_AddNested(_PyInstructionSequence *seq, _PyInstructionSequence *nested); void PyInstructionSequence_Fini(_PyInstructionSequence *seq); diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 573b56a57e1..c3e6c77405b 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -245,6 +245,16 @@ struct _gc_runtime_state { /* True if gc.freeze() has been used. */ int freeze_active; + + /* Memory usage of the process (RSS + swap) after last GC. */ + Py_ssize_t last_mem; + + /* This accumulates the new object count whenever collection is deferred + due to the RSS increase condition not being meet. Reset on collection. */ + Py_ssize_t deferred_count; + + /* Mutex held for gc_should_collect_mem_usage(). */ + PyMutex mutex; #endif }; @@ -754,6 +764,12 @@ struct _is { * and should be placed at the beginning. */ struct _ceval_state ceval; + /* This structure is carefully allocated so that it's correctly aligned + * to avoid undefined behaviors during LOAD and STORE. The '_malloced' + * field stores the allocated pointer address that will later be freed. + */ + void *_malloced; + PyInterpreterState *next; int64_t id; @@ -917,6 +933,8 @@ struct _is { PyObject *common_consts[NUM_COMMON_CONSTANTS]; bool jit; struct _PyExecutorObject *executor_list_head; + struct _PyExecutorObject *executor_deletion_list_head; + int executor_deletion_list_remaining_capacity; size_t trace_run_counter; _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; @@ -935,11 +953,6 @@ struct _is { Py_ssize_t _interactive_src_count; - /* the initial PyInterpreterState.threads.head */ - _PyThreadStateImpl _initial_thread; - // _initial_thread should be the last field of PyInterpreterState. - // See https://github.com/python/cpython/issues/127117. - #if !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) uint64_t next_stackref; _Py_hashtable_t *open_stackrefs_table; @@ -947,6 +960,11 @@ struct _is { _Py_hashtable_t *closed_stackrefs_table; # endif #endif + + /* the initial PyInterpreterState.threads.head */ + _PyThreadStateImpl _initial_thread; + // _initial_thread should be the last field of PyInterpreterState. + // See https://github.com/python/cpython/issues/127117. }; diff --git a/Include/internal/pycore_interpframe.h b/Include/internal/pycore_interpframe.h index 1d373d55ab2..2ee3696317c 100644 --- a/Include/internal/pycore_interpframe.h +++ b/Include/internal/pycore_interpframe.h @@ -18,6 +18,7 @@ extern "C" { ((int)((IF)->instr_ptr - _PyFrame_GetBytecode((IF)))) static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { + assert(!PyStackRef_IsNull(f->f_executable)); PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); assert(PyCode_Check(executable)); return (PyCodeObject *)executable; @@ -47,13 +48,13 @@ static inline _PyStackRef *_PyFrame_Stackbase(_PyInterpreterFrame *f) { } static inline _PyStackRef _PyFrame_StackPeek(_PyInterpreterFrame *f) { - assert(f->stackpointer > f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus); + assert(f->stackpointer > _PyFrame_Stackbase(f)); assert(!PyStackRef_IsNull(f->stackpointer[-1])); return f->stackpointer[-1]; } static inline _PyStackRef _PyFrame_StackPop(_PyInterpreterFrame *f) { - assert(f->stackpointer > f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus); + assert(f->stackpointer > _PyFrame_Stackbase(f)); f->stackpointer--; return *f->stackpointer; } diff --git a/Include/internal/pycore_interpolation.h b/Include/internal/pycore_interpolation.h new file mode 100644 index 00000000000..dd610c1609c --- /dev/null +++ b/Include/internal/pycore_interpolation.h @@ -0,0 +1,26 @@ +#ifndef Py_INTERNAL_INTERPOLATION_H +#define Py_INTERNAL_INTERPOLATION_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyTypeObject _PyInterpolation_Type; + +#define _PyInterpolation_CheckExact(op) Py_IS_TYPE((op), &_PyInterpolation_Type) + +PyAPI_FUNC(PyObject *) _PyInterpolation_Build(PyObject *value, PyObject *str, + int conversion, PyObject *format_spec); + +extern PyStatus _PyInterpolation_InitTypes(PyInterpreterState *interp); +extern PyObject *_PyInterpolation_GetValueRef(PyObject *interpolation); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 5c66d76a0d6..ffbcebdb7df 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -13,10 +13,13 @@ extern "C" { #endif PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); +PyAPI_FUNC(PyObject) *_PyList_SliceSubscript(PyObject*, PyObject*); extern void _PyList_DebugMallocStats(FILE *out); // _PyList_GetItemRef should be used only when the object is known as a list // because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does. extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); + + #ifdef Py_GIL_DISABLED // Returns -1 in case of races with other threads. extern int _PyList_GetItemRefNoLock(PyListObject *, Py_ssize_t, _PyStackRef *); diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index ed6c4353167..3196d1b8208 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -158,6 +158,11 @@ PyAPI_FUNC(int) _PyLong_UnsignedLongLong_Converter(PyObject *, void *); // Export for '_testclinic' shared extension (Argument Clinic code) PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt8_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt16_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt32_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyLong_UInt64_Converter(PyObject *, void *); + /* Long value tag bits: * 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1. * 2: Set to 1 for the small ints diff --git a/Include/internal/pycore_magic_number.h b/Include/internal/pycore_magic_number.h index 3fa2b714cb6..3fd56c346b9 100644 --- a/Include/internal/pycore_magic_number.h +++ b/Include/internal/pycore_magic_number.h @@ -274,8 +274,13 @@ Known values: Python 3.14a6 3619 (Renumber RESUME opcode from 149 to 128) Python 3.14a6 3620 (Optimize bytecode for all/any/tuple called on a genexp) Python 3.14a7 3621 (Optimize LOAD_FAST opcodes into LOAD_FAST_BORROW) + Python 3.14a7 3622 (Store annotations in different class dict keys) + Python 3.14a7 3623 (Add BUILD_INTERPOLATION & BUILD_TEMPLATE opcodes) + Python 3.14b1 3624 (Don't optimize LOAD_FAST when local is killed by DELETE_FAST) + Python 3.15a0 3650 (Initial version) + Python 3.15a1 3651 (Simplify LOAD_CONST) - Python 3.15 will start with 3650 + Python 3.16 will start with 3700 Please don't copy-paste the same pre-release tag for new entries above!!! You should always use the *upcoming* tag. For example, if 3.12a6 came out @@ -286,7 +291,7 @@ PC/launcher.c must also be updated. */ -#define PYC_MAGIC_NUMBER 3621 +#define PYC_MAGIC_NUMBER 3651 /* This is equivalent to converting PYC_MAGIC_NUMBER to 2 bytes (little-endian) and then appending b'\r\n'. */ #define PYC_MAGIC_NUMBER_TOKEN \ diff --git a/Include/internal/pycore_moduleobject.h b/Include/internal/pycore_moduleobject.h index 9bb282a13a9..b170d7bce70 100644 --- a/Include/internal/pycore_moduleobject.h +++ b/Include/internal/pycore_moduleobject.h @@ -47,6 +47,12 @@ static inline PyObject* _PyModule_GetDict(PyObject *mod) { return dict; // borrowed reference } +extern PyObject * _PyModule_GetFilenameObject(PyObject *); +extern Py_ssize_t _PyModule_GetFilenameUTF8( + PyObject *module, + char *buffer, + Py_ssize_t maxlen); + PyObject* _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress); PyObject* _Py_module_getattro(PyObject *m, PyObject *name); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index e5034ff4dcc..b7e162c8abc 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -950,7 +950,7 @@ extern int _PyObject_IsInstanceDictEmpty(PyObject *); // Export for 'math' shared extension PyAPI_FUNC(PyObject*) _PyObject_LookupSpecial(PyObject *, PyObject *); -PyAPI_FUNC(PyObject*) _PyObject_LookupSpecialMethod(PyObject *self, PyObject *attr, PyObject **self_or_null); +PyAPI_FUNC(int) _PyObject_LookupSpecialMethod(PyObject *attr, _PyStackRef *method_and_self); // Calls the method named `attr` on `self`, but does not set an exception if // the attribute does not exist. diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 521f7a92cf0..39d6a912a54 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -20,6 +20,7 @@ extern "C" { #define IS_PSEUDO_INSTR(OP) ( \ ((OP) == LOAD_CLOSURE) || \ ((OP) == STORE_FAST_MAYBE_NULL) || \ + ((OP) == ANNOTATIONS_PLACEHOLDER) || \ ((OP) == JUMP) || \ ((OP) == JUMP_NO_INTERRUPT) || \ ((OP) == JUMP_IF_FALSE) || \ @@ -35,6 +36,8 @@ extern int _PyOpcode_num_popped(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA int _PyOpcode_num_popped(int opcode, int oparg) { switch(opcode) { + case ANNOTATIONS_PLACEHOLDER: + return 0; case BINARY_OP: return 2; case BINARY_OP_ADD_FLOAT: @@ -57,6 +60,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 2; case BINARY_OP_SUBSCR_LIST_INT: return 2; + case BINARY_OP_SUBSCR_LIST_SLICE: + return 2; case BINARY_OP_SUBSCR_STR_INT: return 2; case BINARY_OP_SUBSCR_TUPLE_INT: @@ -67,6 +72,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 2; case BINARY_SLICE: return 3; + case BUILD_INTERPOLATION: + return 2 + (oparg & 1); case BUILD_LIST: return oparg; case BUILD_MAP: @@ -77,6 +84,8 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return oparg; case BUILD_STRING: return oparg; + case BUILD_TEMPLATE: + return 2; case BUILD_TUPLE: return oparg; case CACHE: @@ -104,7 +113,7 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case CALL_INTRINSIC_2: return 2; case CALL_ISINSTANCE: - return 2 + oparg; + return 4; case CALL_KW: return 3 + oparg; case CALL_KW_BOUND_METHOD: @@ -114,7 +123,7 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case CALL_KW_PY: return 3 + oparg; case CALL_LEN: - return 2 + oparg; + return 3; case CALL_LIST_APPEND: return 3; case CALL_METHOD_DESCRIPTOR_FAST: @@ -325,10 +334,6 @@ int _PyOpcode_num_popped(int opcode, int oparg) { return 0; case LOAD_CONST: return 0; - case LOAD_CONST_IMMORTAL: - return 0; - case LOAD_CONST_MORTAL: - return 0; case LOAD_DEREF: return 0; case LOAD_FAST: @@ -514,6 +519,8 @@ extern int _PyOpcode_num_pushed(int opcode, int oparg); #ifdef NEED_OPCODE_METADATA int _PyOpcode_num_pushed(int opcode, int oparg) { switch(opcode) { + case ANNOTATIONS_PLACEHOLDER: + return 0; case BINARY_OP: return 1; case BINARY_OP_ADD_FLOAT: @@ -536,6 +543,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 0; case BINARY_OP_SUBSCR_LIST_INT: return 1; + case BINARY_OP_SUBSCR_LIST_SLICE: + return 1; case BINARY_OP_SUBSCR_STR_INT: return 1; case BINARY_OP_SUBSCR_TUPLE_INT: @@ -546,6 +555,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case BINARY_SLICE: return 1; + case BUILD_INTERPOLATION: + return 1; case BUILD_LIST: return 1; case BUILD_MAP: @@ -556,6 +567,8 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case BUILD_STRING: return 1; + case BUILD_TEMPLATE: + return 1; case BUILD_TUPLE: return 1; case CACHE: @@ -804,10 +817,6 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { return 1; case LOAD_CONST: return 1; - case LOAD_CONST_IMMORTAL: - return 1; - case LOAD_CONST_MORTAL: - return 1; case LOAD_DEREF: return 1; case LOAD_FAST: @@ -1004,7 +1013,7 @@ enum InstructionFormat { }; #define IS_VALID_OPCODE(OP) \ - (((OP) >= 0) && ((OP) < 266) && \ + (((OP) >= 0) && ((OP) < 267) && \ (_PyOpcode_opcode_metadata[(OP)].valid_entry)) #define HAS_ARG_FLAG (1) @@ -1058,9 +1067,9 @@ struct opcode_metadata { uint16_t flags; }; -extern const struct opcode_metadata _PyOpcode_opcode_metadata[266]; +extern const struct opcode_metadata _PyOpcode_opcode_metadata[267]; #ifdef NEED_OPCODE_METADATA -const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { +const struct opcode_metadata _PyOpcode_opcode_metadata[267] = { [BINARY_OP] = { true, INSTR_FMT_IBC0000, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1072,16 +1081,19 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_OP_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG }, [BINARY_OP_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_SUBSCR_LIST_SLICE] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [BINARY_OP_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000, HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_INTERPOLATION] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [BUILD_MAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_SET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BUILD_SLICE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, - [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [BUILD_SLICE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_TEMPLATE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, [CACHE] = { true, INSTR_FMT_IX, 0 }, [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1095,13 +1107,13 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_INTRINSIC_1] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_ISINSTANCE] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_BOUND_METHOD] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LEN] = { true, INSTR_FMT_IXC00, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1175,8 +1187,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, 0 }, - [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD_JIT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, @@ -1185,8 +1197,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LIST_APPEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, [LIST_EXTEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, - [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ATTR_CLASS_WITH_METACLASS_CHECK] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG }, [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, @@ -1196,13 +1208,11 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, - [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_COMMON_CONSTANT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, - [LOAD_CONST_IMMORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, - [LOAD_CONST_MORTAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, @@ -1218,7 +1228,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SMALL_INT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, - [LOAD_SPECIAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_SPECIAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SUPER_ATTR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SUPER_ATTR_ATTR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [LOAD_SUPER_ATTR_METHOD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, @@ -1234,8 +1244,8 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, [POP_ITER] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, - [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, + [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ESCAPES_FLAG }, [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, [POP_TOP] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG | HAS_PURE_FLAG }, [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, @@ -1272,7 +1282,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, - [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, + [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1280,11 +1290,12 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, - [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [ANNOTATIONS_PLACEHOLDER] = { true, -1, HAS_PURE_FLAG }, [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_IF_FALSE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [JUMP_IF_TRUE] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, @@ -1298,7 +1309,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { }; #endif -#define MAX_UOP_PER_EXPANSION 9 +#define MAX_UOP_PER_EXPANSION 10 struct opcode_macro_expansion { int nuops; struct { int16_t uop; int8_t size; int8_t offset; } uops[MAX_UOP_PER_EXPANSION]; @@ -1319,42 +1330,45 @@ _PyOpcode_macro_expansion[256] = { [BINARY_OP_SUBSCR_DICT] = { .nuops = 2, .uops = { { _GUARD_NOS_DICT, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBSCR_DICT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBSCR_GETITEM] = { .nuops = 4, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 5 }, { _BINARY_OP_SUBSCR_CHECK_FUNC, OPARG_SIMPLE, 5 }, { _BINARY_OP_SUBSCR_INIT_CALL, OPARG_SIMPLE, 5 }, { _PUSH_FRAME, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBSCR_LIST_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBSCR_LIST_INT, OPARG_SIMPLE, 5 } } }, + [BINARY_OP_SUBSCR_LIST_SLICE] = { .nuops = 3, .uops = { { _GUARD_TOS_SLICE, OPARG_SIMPLE, 0 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBSCR_LIST_SLICE, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBSCR_STR_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_UNICODE, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBSCR_STR_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBSCR_TUPLE_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_TUPLE, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBSCR_TUPLE_INT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 3, .uops = { { _GUARD_TOS_FLOAT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_FLOAT, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, OPARG_SIMPLE, 5 } } }, [BINARY_OP_SUBTRACT_INT] = { .nuops = 3, .uops = { { _GUARD_TOS_INT, OPARG_SIMPLE, 0 }, { _GUARD_NOS_INT, OPARG_SIMPLE, 0 }, { _BINARY_OP_SUBTRACT_INT, OPARG_SIMPLE, 5 } } }, [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, OPARG_SIMPLE, 0 } } }, + [BUILD_INTERPOLATION] = { .nuops = 1, .uops = { { _BUILD_INTERPOLATION, OPARG_SIMPLE, 0 } } }, [BUILD_LIST] = { .nuops = 1, .uops = { { _BUILD_LIST, OPARG_SIMPLE, 0 } } }, [BUILD_MAP] = { .nuops = 1, .uops = { { _BUILD_MAP, OPARG_SIMPLE, 0 } } }, [BUILD_SET] = { .nuops = 1, .uops = { { _BUILD_SET, OPARG_SIMPLE, 0 } } }, [BUILD_SLICE] = { .nuops = 1, .uops = { { _BUILD_SLICE, OPARG_SIMPLE, 0 } } }, [BUILD_STRING] = { .nuops = 1, .uops = { { _BUILD_STRING, OPARG_SIMPLE, 0 } } }, + [BUILD_TEMPLATE] = { .nuops = 1, .uops = { { _BUILD_TEMPLATE, OPARG_SIMPLE, 0 } } }, [BUILD_TUPLE] = { .nuops = 1, .uops = { { _BUILD_TUPLE, OPARG_SIMPLE, 0 } } }, [CALL_ALLOC_AND_ENTER_INIT] = { .nuops = 4, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_AND_ALLOCATE_OBJECT, 2, 1 }, { _CREATE_INIT_FRAME, OPARG_SIMPLE, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 9, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_BOUND_METHOD_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 10, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, + [CALL_BOUND_METHOD_GENERAL] = { .nuops = 7, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_CLASS] = { .nuops = 2, .uops = { { _CALL_BUILTIN_CLASS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_FAST] = { .nuops = 2, .uops = { { _CALL_BUILTIN_FAST, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 2, .uops = { { _CALL_BUILTIN_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_BUILTIN_O] = { .nuops = 2, .uops = { { _CALL_BUILTIN_O, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { _CALL_INTRINSIC_1, OPARG_SIMPLE, 0 } } }, [CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { _CALL_INTRINSIC_2, OPARG_SIMPLE, 0 } } }, - [CALL_ISINSTANCE] = { .nuops = 1, .uops = { { _CALL_ISINSTANCE, OPARG_SIMPLE, 3 } } }, + [CALL_ISINSTANCE] = { .nuops = 3, .uops = { { _GUARD_THIRD_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_ISINSTANCE, OPARG_SIMPLE, 3 }, { _CALL_ISINSTANCE, OPARG_SIMPLE, 3 } } }, [CALL_KW_BOUND_METHOD] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_METHOD_VERSION_KW, 2, 1 }, { _EXPAND_METHOD_KW, OPARG_SIMPLE, 3 }, { _PY_FRAME_KW, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, [CALL_KW_NON_PY] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE_KW, OPARG_SIMPLE, 3 }, { _CALL_KW_NON_PY, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_KW_PY] = { .nuops = 5, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION_KW, 2, 1 }, { _PY_FRAME_KW, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_LEN] = { .nuops = 1, .uops = { { _CALL_LEN, OPARG_SIMPLE, 3 } } }, - [CALL_LIST_APPEND] = { .nuops = 1, .uops = { { _CALL_LIST_APPEND, OPARG_SIMPLE, 3 } } }, + [CALL_LEN] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_LEN, OPARG_SIMPLE, 3 }, { _CALL_LEN, OPARG_SIMPLE, 3 } } }, + [CALL_LIST_APPEND] = { .nuops = 4, .uops = { { _GUARD_CALLABLE_LIST_APPEND, OPARG_SIMPLE, 3 }, { _GUARD_NOS_NOT_NULL, OPARG_SIMPLE, 3 }, { _GUARD_NOS_LIST, OPARG_SIMPLE, 3 }, { _CALL_LIST_APPEND, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_FAST] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_NOARGS, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_METHOD_DESCRIPTOR_O] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_O, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, [CALL_NON_PY_GENERAL] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE, OPARG_SIMPLE, 3 }, { _CALL_NON_PY_GENERAL, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_PY_EXACT_ARGS] = { .nuops = 7, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_PY_GENERAL] = { .nuops = 5, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, - [CALL_STR_1] = { .nuops = 2, .uops = { { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_TUPLE_1] = { .nuops = 2, .uops = { { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, - [CALL_TYPE_1] = { .nuops = 1, .uops = { { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, + [CALL_PY_EXACT_ARGS] = { .nuops = 8, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_FUNCTION_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _CHECK_STACK_SPACE, OPARG_SIMPLE, 3 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _INIT_CALL_PY_EXACT_ARGS, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, + [CALL_PY_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, OPARG_SIMPLE, 1 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _CHECK_RECURSION_REMAINING, OPARG_SIMPLE, 3 }, { _PY_FRAME_GENERAL, OPARG_SIMPLE, 3 }, { _SAVE_RETURN_OFFSET, OPARG_SAVE_RETURN_OFFSET, 3 }, { _PUSH_FRAME, OPARG_SIMPLE, 3 } } }, + [CALL_STR_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_STR_1, OPARG_SIMPLE, 3 }, { _CALL_STR_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, + [CALL_TUPLE_1] = { .nuops = 4, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TUPLE_1, OPARG_SIMPLE, 3 }, { _CALL_TUPLE_1, OPARG_SIMPLE, 3 }, { _CHECK_PERIODIC, OPARG_SIMPLE, 3 } } }, + [CALL_TYPE_1] = { .nuops = 3, .uops = { { _GUARD_NOS_NULL, OPARG_SIMPLE, 3 }, { _GUARD_CALLABLE_TYPE_1, OPARG_SIMPLE, 3 }, { _CALL_TYPE_1, OPARG_SIMPLE, 3 } } }, [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, OPARG_SIMPLE, 0 } } }, [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { _CHECK_EXC_MATCH, OPARG_SIMPLE, 0 } } }, [COMPARE_OP] = { .nuops = 1, .uops = { { _COMPARE_OP, OPARG_SIMPLE, 0 } } }, @@ -1411,8 +1425,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_ATTR_WITH_HINT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_WITH_HINT, 1, 3 }, { _PUSH_NULL_CONDITIONAL, OPARG_SIMPLE, 9 } } }, [LOAD_BUILD_CLASS] = { .nuops = 1, .uops = { { _LOAD_BUILD_CLASS, OPARG_SIMPLE, 0 } } }, [LOAD_COMMON_CONSTANT] = { .nuops = 1, .uops = { { _LOAD_COMMON_CONSTANT, OPARG_SIMPLE, 0 } } }, - [LOAD_CONST_IMMORTAL] = { .nuops = 1, .uops = { { _LOAD_CONST_IMMORTAL, OPARG_SIMPLE, 0 } } }, - [LOAD_CONST_MORTAL] = { .nuops = 1, .uops = { { _LOAD_CONST_MORTAL, OPARG_SIMPLE, 0 } } }, + [LOAD_CONST] = { .nuops = 1, .uops = { { _LOAD_CONST, OPARG_SIMPLE, 0 } } }, [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, OPARG_SIMPLE, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, OPARG_SIMPLE, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, OPARG_SIMPLE, 0 } } }, @@ -1427,7 +1440,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, OPARG_SIMPLE, 0 } } }, [LOAD_NAME] = { .nuops = 1, .uops = { { _LOAD_NAME, OPARG_SIMPLE, 0 } } }, [LOAD_SMALL_INT] = { .nuops = 1, .uops = { { _LOAD_SMALL_INT, OPARG_SIMPLE, 0 } } }, - [LOAD_SPECIAL] = { .nuops = 1, .uops = { { _LOAD_SPECIAL, OPARG_SIMPLE, 0 } } }, + [LOAD_SPECIAL] = { .nuops = 2, .uops = { { _INSERT_NULL, OPARG_SIMPLE, 0 }, { _LOAD_SPECIAL, OPARG_SIMPLE, 0 } } }, [LOAD_SUPER_ATTR_ATTR] = { .nuops = 1, .uops = { { _LOAD_SUPER_ATTR_ATTR, OPARG_SIMPLE, 1 } } }, [LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { _LOAD_SUPER_ATTR_METHOD, OPARG_SIMPLE, 1 } } }, [MAKE_CELL] = { .nuops = 1, .uops = { { _MAKE_CELL, OPARG_SIMPLE, 0 } } }, @@ -1491,9 +1504,10 @@ _PyOpcode_macro_expansion[256] = { }; #endif // NEED_OPCODE_METADATA -extern const char *_PyOpcode_OpName[266]; +extern const char *_PyOpcode_OpName[267]; #ifdef NEED_OPCODE_METADATA -const char *_PyOpcode_OpName[266] = { +const char *_PyOpcode_OpName[267] = { + [ANNOTATIONS_PLACEHOLDER] = "ANNOTATIONS_PLACEHOLDER", [BINARY_OP] = "BINARY_OP", [BINARY_OP_ADD_FLOAT] = "BINARY_OP_ADD_FLOAT", [BINARY_OP_ADD_INT] = "BINARY_OP_ADD_INT", @@ -1505,16 +1519,19 @@ const char *_PyOpcode_OpName[266] = { [BINARY_OP_SUBSCR_DICT] = "BINARY_OP_SUBSCR_DICT", [BINARY_OP_SUBSCR_GETITEM] = "BINARY_OP_SUBSCR_GETITEM", [BINARY_OP_SUBSCR_LIST_INT] = "BINARY_OP_SUBSCR_LIST_INT", + [BINARY_OP_SUBSCR_LIST_SLICE] = "BINARY_OP_SUBSCR_LIST_SLICE", [BINARY_OP_SUBSCR_STR_INT] = "BINARY_OP_SUBSCR_STR_INT", [BINARY_OP_SUBSCR_TUPLE_INT] = "BINARY_OP_SUBSCR_TUPLE_INT", [BINARY_OP_SUBTRACT_FLOAT] = "BINARY_OP_SUBTRACT_FLOAT", [BINARY_OP_SUBTRACT_INT] = "BINARY_OP_SUBTRACT_INT", [BINARY_SLICE] = "BINARY_SLICE", + [BUILD_INTERPOLATION] = "BUILD_INTERPOLATION", [BUILD_LIST] = "BUILD_LIST", [BUILD_MAP] = "BUILD_MAP", [BUILD_SET] = "BUILD_SET", [BUILD_SLICE] = "BUILD_SLICE", [BUILD_STRING] = "BUILD_STRING", + [BUILD_TEMPLATE] = "BUILD_TEMPLATE", [BUILD_TUPLE] = "BUILD_TUPLE", [CACHE] = "CACHE", [CALL] = "CALL", @@ -1639,8 +1656,6 @@ const char *_PyOpcode_OpName[266] = { [LOAD_CLOSURE] = "LOAD_CLOSURE", [LOAD_COMMON_CONSTANT] = "LOAD_COMMON_CONSTANT", [LOAD_CONST] = "LOAD_CONST", - [LOAD_CONST_IMMORTAL] = "LOAD_CONST_IMMORTAL", - [LOAD_CONST_MORTAL] = "LOAD_CONST_MORTAL", [LOAD_DEREF] = "LOAD_DEREF", [LOAD_FAST] = "LOAD_FAST", [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", @@ -1759,6 +1774,37 @@ const uint8_t _PyOpcode_Caches[256] = { extern const uint8_t _PyOpcode_Deopt[256]; #ifdef NEED_OPCODE_METADATA const uint8_t _PyOpcode_Deopt[256] = { + [121] = 121, + [122] = 122, + [123] = 123, + [124] = 124, + [125] = 125, + [126] = 126, + [127] = 127, + [210] = 210, + [211] = 211, + [212] = 212, + [213] = 213, + [214] = 214, + [215] = 215, + [216] = 216, + [217] = 217, + [218] = 218, + [219] = 219, + [220] = 220, + [221] = 221, + [222] = 222, + [223] = 223, + [224] = 224, + [225] = 225, + [226] = 226, + [227] = 227, + [228] = 228, + [229] = 229, + [230] = 230, + [231] = 231, + [232] = 232, + [233] = 233, [BINARY_OP] = BINARY_OP, [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, @@ -1770,16 +1816,19 @@ const uint8_t _PyOpcode_Deopt[256] = { [BINARY_OP_SUBSCR_DICT] = BINARY_OP, [BINARY_OP_SUBSCR_GETITEM] = BINARY_OP, [BINARY_OP_SUBSCR_LIST_INT] = BINARY_OP, + [BINARY_OP_SUBSCR_LIST_SLICE] = BINARY_OP, [BINARY_OP_SUBSCR_STR_INT] = BINARY_OP, [BINARY_OP_SUBSCR_TUPLE_INT] = BINARY_OP, [BINARY_OP_SUBTRACT_FLOAT] = BINARY_OP, [BINARY_OP_SUBTRACT_INT] = BINARY_OP, [BINARY_SLICE] = BINARY_SLICE, + [BUILD_INTERPOLATION] = BUILD_INTERPOLATION, [BUILD_LIST] = BUILD_LIST, [BUILD_MAP] = BUILD_MAP, [BUILD_SET] = BUILD_SET, [BUILD_SLICE] = BUILD_SLICE, [BUILD_STRING] = BUILD_STRING, + [BUILD_TEMPLATE] = BUILD_TEMPLATE, [BUILD_TUPLE] = BUILD_TUPLE, [CACHE] = CACHE, [CALL] = CALL, @@ -1899,8 +1948,6 @@ const uint8_t _PyOpcode_Deopt[256] = { [LOAD_BUILD_CLASS] = LOAD_BUILD_CLASS, [LOAD_COMMON_CONSTANT] = LOAD_COMMON_CONSTANT, [LOAD_CONST] = LOAD_CONST, - [LOAD_CONST_IMMORTAL] = LOAD_CONST, - [LOAD_CONST_MORTAL] = LOAD_CONST, [LOAD_DEREF] = LOAD_DEREF, [LOAD_FAST] = LOAD_FAST, [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, @@ -1988,8 +2035,6 @@ const uint8_t _PyOpcode_Deopt[256] = { #endif // NEED_OPCODE_METADATA #define EXTRA_CASES \ - case 119: \ - case 120: \ case 121: \ case 122: \ case 123: \ @@ -1997,6 +2042,7 @@ const uint8_t _PyOpcode_Deopt[256] = { case 125: \ case 126: \ case 127: \ + case 210: \ case 211: \ case 212: \ case 213: \ @@ -2025,11 +2071,12 @@ struct pseudo_targets { uint8_t as_sequence; uint8_t targets[4]; }; -extern const struct pseudo_targets _PyOpcode_PseudoTargets[10]; +extern const struct pseudo_targets _PyOpcode_PseudoTargets[11]; #ifdef NEED_OPCODE_METADATA -const struct pseudo_targets _PyOpcode_PseudoTargets[10] = { +const struct pseudo_targets _PyOpcode_PseudoTargets[11] = { [LOAD_CLOSURE-256] = { 0, { LOAD_FAST, 0, 0, 0 } }, [STORE_FAST_MAYBE_NULL-256] = { 0, { STORE_FAST, 0, 0, 0 } }, + [ANNOTATIONS_PLACEHOLDER-256] = { 0, { NOP, 0, 0, 0 } }, [JUMP-256] = { 0, { JUMP_FORWARD, JUMP_BACKWARD, 0, 0 } }, [JUMP_NO_INTERRUPT-256] = { 0, { JUMP_FORWARD, JUMP_BACKWARD_NO_INTERRUPT, 0, 0 } }, [JUMP_IF_FALSE-256] = { 1, { COPY, TO_BOOL, POP_JUMP_IF_FALSE, 0 } }, @@ -2043,7 +2090,7 @@ const struct pseudo_targets _PyOpcode_PseudoTargets[10] = { #endif // NEED_OPCODE_METADATA static inline bool is_pseudo_target(int pseudo, int target) { - if (pseudo < 256 || pseudo >= 266) { + if (pseudo < 256 || pseudo >= 267) { return false; } for (int i = 0; _PyOpcode_PseudoTargets[pseudo-256].targets[i]; i++) { diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h index b3056e7bb84..79a1a242556 100644 --- a/Include/internal/pycore_opcode_utils.h +++ b/Include/internal/pycore_opcode_utils.h @@ -54,6 +54,11 @@ extern "C" { (opcode) == RAISE_VARARGS || \ (opcode) == RERAISE) +#define IS_RETURN_OPCODE(opcode) \ + (opcode == RETURN_VALUE) +#define IS_RAISE_OPCODE(opcode) \ + (opcode == RAISE_VARARGS || opcode == RERAISE) + /* Flags used in the oparg for MAKE_FUNCTION */ #define MAKE_FUNCTION_DEFAULTS 0x01 diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 4af1fa63ac1..d3674726997 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -69,7 +69,7 @@ typedef struct { typedef struct { uint32_t target; _Py_BackoffCounter temperature; - const struct _PyExecutorObject *executor; + struct _PyExecutorObject *executor; } _PyExitData; typedef struct _PyExecutorObject { @@ -84,6 +84,10 @@ typedef struct _PyExecutorObject { _PyExitData exits[1]; } _PyExecutorObject; +/* If pending deletion list gets large enough, then scan, + * and free any executors that aren't executing + * i.e. any that aren't a thread's current_executor. */ +#define EXECUTOR_DELETE_LIST_MAX 100 // Export for '_opcode' shared extension (JIT compiler). PyAPI_FUNC(_PyExecutorObject*) _Py_GetExecutor(PyCodeObject *code, int offset); @@ -304,6 +308,9 @@ static inline int is_terminator(const _PyUOpInstruction *uop) } PyAPI_FUNC(int) _PyDumpExecutors(FILE *out); +#ifdef _Py_TIER2 +extern void _Py_ClearExecutorDeletionList(PyInterpreterState *interp); +#endif #ifdef __cplusplus } diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index d755d03a5fa..3e41e2fd156 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -109,6 +109,8 @@ extern "C" { _Py_atomic_store_ullong_relaxed(&value, new_value) #define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) \ _Py_atomic_load_ullong_relaxed(&value) +#define FT_ATOMIC_ADD_SSIZE(value, new_value) \ + (void)_Py_atomic_add_ssize(&value, new_value) #else #define FT_ATOMIC_LOAD_PTR(value) value @@ -156,6 +158,7 @@ extern "C" { #define FT_ATOMIC_STORE_LLONG_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) value #define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_ADD_SSIZE(value, new_value) (void)(value += new_value) #endif diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h index fa7d9ee36d0..2c2048f7e12 100644 --- a/Include/internal/pycore_pyerrors.h +++ b/Include/internal/pycore_pyerrors.h @@ -60,6 +60,7 @@ extern PyObject* _PyErr_SetImportErrorWithNameFrom( PyObject *, PyObject *, PyObject *); +extern int _PyErr_SetModuleNotFoundError(PyObject *name); /* runtime lifecycle */ @@ -93,13 +94,13 @@ extern void _PyErr_Fetch( PyObject **value, PyObject **traceback); -extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); +PyAPI_FUNC(PyObject*) _PyErr_GetRaisedException(PyThreadState *tstate); PyAPI_FUNC(int) _PyErr_ExceptionMatches( PyThreadState *tstate, PyObject *exc); -extern void _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); +PyAPI_FUNC(void) _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); extern void _PyErr_Restore( PyThreadState *tstate, @@ -113,6 +114,7 @@ extern void _PyErr_SetObject( PyObject *value); extern void _PyErr_ChainStackItem(void); +extern void _PyErr_ChainExceptions1Tstate(PyThreadState *, PyObject *); PyAPI_FUNC(void) _PyErr_Clear(PyThreadState *tstate); @@ -148,6 +150,12 @@ PyAPI_FUNC(PyObject*) _PyErr_Format( const char *format, ...); +PyAPI_FUNC(PyObject*) _PyErr_FormatV( + PyThreadState *tstate, + PyObject *exception, + const char *format, + va_list vargs); + extern void _PyErr_NormalizeException( PyThreadState *tstate, PyObject **exc, diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 864e0f5d1db..633e5cf77db 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -9,6 +9,7 @@ extern "C" { #endif #include "pycore_typedefs.h" // _PyRuntimeState +#include "pycore_tstate.h" // Values for PyThreadState.state. A thread must be in the "attached" state @@ -283,6 +284,9 @@ PyAPI_FUNC(const PyConfig*) _Py_GetConfig(void); // See also PyInterpreterState_Get() and _PyInterpreterState_GET(). extern PyInterpreterState* _PyGILState_GetInterpreterStateUnsafe(void); +extern PyObject * _Py_GetMainModule(PyThreadState *); +extern int _Py_CheckMainModule(PyObject *module); + #ifndef NDEBUG /* Modern equivalent of assert(PyGILState_Check()) */ static inline void @@ -296,6 +300,34 @@ _Py_AssertHoldsTstateFunc(const char *func) #define _Py_AssertHoldsTstate() #endif +#if !_Py__has_builtin(__builtin_frame_address) && !defined(__GNUC__) && !defined(_MSC_VER) +static uintptr_t return_pointer_as_int(char* p) { + return (uintptr_t)p; +} +#endif + +static inline uintptr_t +_Py_get_machine_stack_pointer(void) { +#if _Py__has_builtin(__builtin_frame_address) || defined(__GNUC__) + return (uintptr_t)__builtin_frame_address(0); +#elif defined(_MSC_VER) + return (uintptr_t)_AddressOfReturnAddress(); +#else + char here; + /* Avoid compiler warning about returning stack address */ + return return_pointer_as_int(&here); +#endif +} + +static inline intptr_t +_Py_RecursionLimit_GetMargin(PyThreadState *tstate) +{ + _PyThreadStateImpl *_tstate = (_PyThreadStateImpl *)tstate; + assert(_tstate->c_stack_hard_limit != 0); + intptr_t here_addr = _Py_get_machine_stack_pointer(); + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, here_addr - (intptr_t)_tstate->c_stack_soft_limit, PYOS_STACK_MARGIN_SHIFT); +} + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h index 0bfc5704dc4..7daed1326af 100644 --- a/Include/internal/pycore_pythonrun.h +++ b/Include/internal/pycore_pythonrun.h @@ -25,6 +25,7 @@ extern int _PyRun_InteractiveLoopObject( PyObject *filename, PyCompilerFlags *flags); +extern int _PyObject_SupportedAsScript(PyObject *); extern const char* _Py_SourceAsString( PyObject *cmd, const char *funcname, diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 2b2e439681f..b182f7825a2 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -61,9 +61,6 @@ extern PyTypeObject _PyExc_MemoryError; }, \ }, \ }, \ - /* A TSS key must be initialized with Py_tss_NEEDS_INIT \ - in accordance with the specification. */ \ - .autoTSSkey = Py_tss_NEEDS_INIT, \ .parser = _parser_runtime_state_INIT, \ .ceval = { \ .pending_mainthread = { \ @@ -233,9 +230,7 @@ extern PyTypeObject _PyExc_MemoryError; ._data = (LITERAL), \ } -#include "pycore_runtime_init_generated.h" - #ifdef __cplusplus } #endif -#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ +#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ \ No newline at end of file diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 07a74dd26cd..83301d8aef7 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -585,7 +585,9 @@ extern "C" { INIT_ID(__and__), \ INIT_ID(__anext__), \ INIT_ID(__annotate__), \ + INIT_ID(__annotate_func__), \ INIT_ID(__annotations__), \ + INIT_ID(__annotations_cache__), \ INIT_ID(__args__), \ INIT_ID(__await__), \ INIT_ID(__bases__), \ @@ -788,7 +790,6 @@ extern "C" { INIT_ID(add_done_callback), \ INIT_ID(after_in_child), \ INIT_ID(after_in_parent), \ - INIT_ID(aggregate_class), \ INIT_ID(alias), \ INIT_ID(align), \ INIT_ID(all), \ @@ -805,7 +806,6 @@ extern "C" { INIT_ID(ast), \ INIT_ID(athrow), \ INIT_ID(attribute), \ - INIT_ID(authorizer_callback), \ INIT_ID(autocommit), \ INIT_ID(backtick), \ INIT_ID(base), \ @@ -830,6 +830,7 @@ extern "C" { INIT_ID(bytes_per_sep), \ INIT_ID(c_call), \ INIT_ID(c_exception), \ + INIT_ID(c_parameter_type), \ INIT_ID(c_return), \ INIT_ID(cached_datetime_module), \ INIT_ID(cached_statements), \ @@ -876,6 +877,7 @@ extern "C" { INIT_ID(consts), \ INIT_ID(context), \ INIT_ID(contravariant), \ + INIT_ID(conversion), \ INIT_ID(cookie), \ INIT_ID(copy), \ INIT_ID(copyreg), \ @@ -883,6 +885,7 @@ extern "C" { INIT_ID(count), \ INIT_ID(covariant), \ INIT_ID(cwd), \ + INIT_ID(d_parameter_type), \ INIT_ID(data), \ INIT_ID(database), \ INIT_ID(day), \ @@ -933,6 +936,7 @@ extern "C" { INIT_ID(exception), \ INIT_ID(existing_file_name), \ INIT_ID(exp), \ + INIT_ID(expression), \ INIT_ID(extend), \ INIT_ID(extra_tokens), \ INIT_ID(facility), \ @@ -962,6 +966,7 @@ extern "C" { INIT_ID(follow_symlinks), \ INIT_ID(format), \ INIT_ID(format_spec), \ + INIT_ID(frame_buffer), \ INIT_ID(from_param), \ INIT_ID(fromlist), \ INIT_ID(fromtimestamp), \ @@ -1019,6 +1024,9 @@ extern "C" { INIT_ID(intern), \ INIT_ID(intersection), \ INIT_ID(interval), \ + INIT_ID(io), \ + INIT_ID(is_compress), \ + INIT_ID(is_raw), \ INIT_ID(is_running), \ INIT_ID(is_struct), \ INIT_ID(isatty), \ @@ -1095,7 +1103,6 @@ extern "C" { INIT_ID(msg), \ INIT_ID(mutex), \ INIT_ID(mycmp), \ - INIT_ID(n_arg), \ INIT_ID(n_fields), \ INIT_ID(n_sequence_fields), \ INIT_ID(n_unnamed_fields), \ @@ -1103,7 +1110,6 @@ extern "C" { INIT_ID(name_from), \ INIT_ID(namespace_separator), \ INIT_ID(namespaces), \ - INIT_ID(narg), \ INIT_ID(ndigits), \ INIT_ID(nested), \ INIT_ID(new_file_name), \ @@ -1142,6 +1148,7 @@ extern "C" { INIT_ID(overlapped), \ INIT_ID(owner), \ INIT_ID(pages), \ + INIT_ID(parameter), \ INIT_ID(parent), \ INIT_ID(password), \ INIT_ID(path), \ @@ -1160,7 +1167,6 @@ extern "C" { INIT_ID(print_file_and_line), \ INIT_ID(priority), \ INIT_ID(progress), \ - INIT_ID(progress_handler), \ INIT_ID(progress_routine), \ INIT_ID(proto), \ INIT_ID(protocol), \ @@ -1266,7 +1272,6 @@ extern "C" { INIT_ID(timetuple), \ INIT_ID(timeunit), \ INIT_ID(top), \ - INIT_ID(trace_callback), \ INIT_ID(traceback), \ INIT_ID(trailers), \ INIT_ID(translate), \ @@ -1303,6 +1308,7 @@ extern "C" { INIT_ID(write_through), \ INIT_ID(year), \ INIT_ID(zdict), \ + INIT_ID(zstd_dict), \ } #define _Py_str_ascii_INIT { \ diff --git a/Include/internal/pycore_runtime_structs.h b/Include/internal/pycore_runtime_structs.h index 6bf3aae7175..12164c7fdd9 100644 --- a/Include/internal/pycore_runtime_structs.h +++ b/Include/internal/pycore_runtime_structs.h @@ -223,9 +223,6 @@ struct pyruntimestate { struct _pythread_runtime_state threads; struct _signals_runtime_state signals; - /* Used for the thread state bound to the current thread. */ - Py_tss_t autoTSSkey; - /* Used instead of PyThreadState.trash when there is not current tstate. */ Py_tss_t trashTSSkey; diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 5683b98470d..40ec00c8119 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -63,11 +63,13 @@ extern void _Py_stackref_associate(PyInterpreterState *interp, PyObject *obj, _P static const _PyStackRef PyStackRef_NULL = { .index = 0 }; -#define PyStackRef_None ((_PyStackRef){ .index = 1 } ) -#define PyStackRef_False ((_PyStackRef){ .index = 2 }) -#define PyStackRef_True ((_PyStackRef){ .index = 3 }) +// Use the first 3 even numbers for None, True and False. +// Odd numbers are reserved for (tagged) integers +#define PyStackRef_None ((_PyStackRef){ .index = 2 } ) +#define PyStackRef_False ((_PyStackRef){ .index = 4 }) +#define PyStackRef_True ((_PyStackRef){ .index = 6 }) -#define LAST_PREDEFINED_STACKREF_INDEX 3 +#define INITIAL_STACKREF_INDEX 8 static inline int PyStackRef_IsNull(_PyStackRef ref) @@ -93,9 +95,16 @@ PyStackRef_IsNone(_PyStackRef ref) return _Py_stackref_get_object(ref) == Py_None; } +static inline bool +PyStackRef_IsTaggedInt(_PyStackRef ref) +{ + return (ref.index & 1) == 1; +} + static inline PyObject * _PyStackRef_AsPyObjectBorrow(_PyStackRef ref, const char *filename, int linenumber) { + assert(!PyStackRef_IsTaggedInt(ref)); _Py_stackref_record_borrow(ref, filename, linenumber); return _Py_stackref_get_object(ref); } @@ -125,38 +134,45 @@ _PyStackRef_FromPyObjectSteal(PyObject *obj, const char *filename, int linenumbe #define PyStackRef_FromPyObjectSteal(obj) _PyStackRef_FromPyObjectSteal(_PyObject_CAST(obj), __FILE__, __LINE__) static inline _PyStackRef -_PyStackRef_FromPyObjectImmortal(PyObject *obj, const char *filename, int linenumber) +_PyStackRef_FromPyObjectBorrow(PyObject *obj, const char *filename, int linenumber) { - assert(_Py_IsImmortal(obj)); return _Py_stackref_create(obj, filename, linenumber); } -#define PyStackRef_FromPyObjectImmortal(obj) _PyStackRef_FromPyObjectImmortal(_PyObject_CAST(obj), __FILE__, __LINE__) +#define PyStackRef_FromPyObjectBorrow(obj) _PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj), __FILE__, __LINE__) static inline void _PyStackRef_CLOSE(_PyStackRef ref, const char *filename, int linenumber) { + if (PyStackRef_IsTaggedInt(ref)) { + return; + } PyObject *obj = _Py_stackref_close(ref, filename, linenumber); Py_DECREF(obj); } #define PyStackRef_CLOSE(REF) _PyStackRef_CLOSE((REF), __FILE__, __LINE__) + static inline void _PyStackRef_XCLOSE(_PyStackRef ref, const char *filename, int linenumber) { if (PyStackRef_IsNull(ref)) { return; } - PyObject *obj = _Py_stackref_close(ref, filename, linenumber); - Py_DECREF(obj); + _PyStackRef_CLOSE(ref, filename, linenumber); } #define PyStackRef_XCLOSE(REF) _PyStackRef_XCLOSE((REF), __FILE__, __LINE__) static inline _PyStackRef _PyStackRef_DUP(_PyStackRef ref, const char *filename, int linenumber) { - PyObject *obj = _Py_stackref_get_object(ref); - Py_INCREF(obj); - return _Py_stackref_create(obj, filename, linenumber); + if (PyStackRef_IsTaggedInt(ref)) { + return ref; + } + else { + PyObject *obj = _Py_stackref_get_object(ref); + Py_INCREF(obj); + return _Py_stackref_create(obj, filename, linenumber); + } } #define PyStackRef_DUP(REF) _PyStackRef_DUP(REF, __FILE__, __LINE__) @@ -210,12 +226,45 @@ _PyStackRef_FromPyObjectNewMortal(PyObject *obj, const char *filename, int linen extern int PyStackRef_Is(_PyStackRef a, _PyStackRef b); +extern bool PyStackRef_IsTaggedInt(_PyStackRef ref); + +extern intptr_t PyStackRef_UntagInt(_PyStackRef ref); + +extern _PyStackRef PyStackRef_TagInt(intptr_t i); + +extern bool +PyStackRef_IsNullOrInt(_PyStackRef ref); + #else +#define Py_INT_TAG 3 +#define Py_TAG_REFCNT 1 + +static inline bool +PyStackRef_IsTaggedInt(_PyStackRef i) +{ + return (i.bits & Py_INT_TAG) == Py_INT_TAG; +} + +static inline _PyStackRef +PyStackRef_TagInt(intptr_t i) +{ + assert(Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, (i << 2), 2) == i); + return (_PyStackRef){ .bits = ((((uintptr_t)i) << 2) | Py_INT_TAG) }; +} + +static inline intptr_t +PyStackRef_UntagInt(_PyStackRef i) +{ + assert(PyStackRef_IsTaggedInt(i)); + intptr_t val = (intptr_t)i.bits; + return Py_ARITHMETIC_RIGHT_SHIFT(intptr_t, val, 2); +} + #ifdef Py_GIL_DISABLED -#define Py_TAG_DEFERRED (1) +#define Py_TAG_DEFERRED Py_TAG_REFCNT #define Py_TAG_PTR ((uintptr_t)0) #define Py_TAG_BITS ((uintptr_t)1) @@ -232,9 +281,12 @@ static const _PyStackRef PyStackRef_NULL = { .bits = Py_TAG_DEFERRED}; #define PyStackRef_IsTrue(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_True) #define PyStackRef_IsFalse(ref) (PyStackRef_AsPyObjectBorrow(ref) == Py_False) +#define PyStackRef_IsNullOrInt(stackref) (PyStackRef_IsNull(stackref) || PyStackRef_IsTaggedInt(stackref)) + static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef stackref) { + assert(!PyStackRef_IsTaggedInt(stackref)); PyObject *cleared = ((PyObject *)((stackref).bits & (~Py_TAG_BITS))); return cleared; } @@ -314,15 +366,14 @@ PyStackRef_FromPyObjectNew(PyObject *obj) #define PyStackRef_FromPyObjectNew(obj) PyStackRef_FromPyObjectNew(_PyObject_CAST(obj)) static inline _PyStackRef -PyStackRef_FromPyObjectImmortal(PyObject *obj) +PyStackRef_FromPyObjectBorrow(PyObject *obj) { // Make sure we don't take an already tagged value. assert(((uintptr_t)obj & Py_TAG_BITS) == 0); assert(obj != NULL); - assert(_Py_IsImmortal(obj)); return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED }; } -#define PyStackRef_FromPyObjectImmortal(obj) PyStackRef_FromPyObjectImmortal(_PyObject_CAST(obj)) +#define PyStackRef_FromPyObjectBorrow(obj) PyStackRef_FromPyObjectBorrow(_PyObject_CAST(obj)) #define PyStackRef_CLOSE(REF) \ do { \ @@ -391,14 +442,13 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) /* References to immortal objects always have their tag bit set to Py_TAG_REFCNT * as they can (must) have their reclamation deferred */ -#define Py_TAG_BITS 1 -#define Py_TAG_REFCNT 1 +#define Py_TAG_BITS 3 #if _Py_IMMORTAL_FLAGS != Py_TAG_REFCNT # error "_Py_IMMORTAL_FLAGS != Py_TAG_REFCNT" #endif #define BITS_TO_PTR(REF) ((PyObject *)((REF).bits)) -#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_BITS))) +#define BITS_TO_PTR_MASKED(REF) ((PyObject *)(((REF).bits) & (~Py_TAG_REFCNT))) #define PyStackRef_NULL_BITS Py_TAG_REFCNT static const _PyStackRef PyStackRef_NULL = { .bits = PyStackRef_NULL_BITS }; @@ -451,6 +501,7 @@ PyStackRef_RefcountOnObject(_PyStackRef ref) static inline PyObject * PyStackRef_AsPyObjectBorrow(_PyStackRef ref) { + assert(!PyStackRef_IsTaggedInt(ref)); return BITS_TO_PTR_MASKED(ref); } @@ -477,7 +528,7 @@ PyStackRef_FromPyObjectSteal(PyObject *obj) { assert(obj != NULL); #if SIZEOF_VOID_P > 4 - unsigned int tag = obj->ob_flags & Py_TAG_BITS; + unsigned int tag = obj->ob_flags & Py_TAG_REFCNT; #else unsigned int tag = _Py_IsImmortal(obj) ? Py_TAG_REFCNT : 0; #endif @@ -496,12 +547,6 @@ PyStackRef_FromPyObjectStealMortal(PyObject *obj) return ref; } -// Check if a stackref is exactly the same as another stackref, including the -// the deferred bit. This can only be used safely if you know that the deferred -// bits of `a` and `b` match. -#define PyStackRef_IsExactly(a, b) \ - (assert(((a).bits & Py_TAG_BITS) == ((b).bits & Py_TAG_BITS)), (a).bits == (b).bits) - static inline _PyStackRef _PyStackRef_FromPyObjectNew(PyObject *obj) { @@ -529,9 +574,8 @@ _PyStackRef_FromPyObjectNewMortal(PyObject *obj) /* Create a new reference from an object with an embedded reference count */ static inline _PyStackRef -PyStackRef_FromPyObjectImmortal(PyObject *obj) +PyStackRef_FromPyObjectBorrow(PyObject *obj) { - assert(_Py_IsImmortal(obj)); return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_REFCNT}; } @@ -554,7 +598,7 @@ PyStackRef_DUP(_PyStackRef ref) static inline bool PyStackRef_IsHeapSafe(_PyStackRef ref) { - return (ref.bits & Py_TAG_BITS) == 0 || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); + return (ref.bits & Py_TAG_BITS) != Py_TAG_REFCNT || ref.bits == PyStackRef_NULL_BITS || _Py_IsImmortal(BITS_TO_PTR_MASKED(ref)); } static inline _PyStackRef @@ -587,6 +631,12 @@ PyStackRef_CLOSE(_PyStackRef ref) } #endif +static inline bool +PyStackRef_IsNullOrInt(_PyStackRef ref) +{ + return PyStackRef_IsNull(ref) || PyStackRef_IsTaggedInt(ref); +} + static inline void PyStackRef_CLOSE_SPECIALIZED(_PyStackRef ref, destructor destruct) { @@ -623,7 +673,7 @@ PyStackRef_XCLOSE(_PyStackRef ref) // Note: this is a macro because MSVC (Windows) has trouble inlining it. -#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_BITS)) == ((b).bits & (~Py_TAG_BITS))) +#define PyStackRef_Is(a, b) (((a).bits & (~Py_TAG_REFCNT)) == ((b).bits & (~Py_TAG_REFCNT))) #endif // !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) @@ -726,7 +776,7 @@ _Py_TryXGetStackRef(PyObject **src, _PyStackRef *out) // Like Py_VISIT but for _PyStackRef fields #define _Py_VISIT_STACKREF(ref) \ do { \ - if (!PyStackRef_IsNull(ref)) { \ + if (!PyStackRef_IsNullOrInt(ref)) { \ int vret = _PyGC_VisitStackRef(&(ref), visit, arg); \ if (vret) \ return vret; \ diff --git a/Include/internal/pycore_template.h b/Include/internal/pycore_template.h new file mode 100644 index 00000000000..f2f8bf9912d --- /dev/null +++ b/Include/internal/pycore_template.h @@ -0,0 +1,26 @@ +#ifndef Py_INTERNAL_TEMPLATE_H +#define Py_INTERNAL_TEMPLATE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyTypeObject _PyTemplate_Type; +extern PyTypeObject _PyTemplateIter_Type; + +#define _PyTemplate_CheckExact(op) Py_IS_TYPE((op), &_PyTemplate_Type) +#define _PyTemplateIter_CheckExact(op) Py_IS_TYPE((op), &_PyTemplateIter_Type) + +extern PyObject *_PyTemplate_Concat(PyObject *self, PyObject *other); + +PyAPI_FUNC(PyObject *) _PyTemplate_Build(PyObject *strings, PyObject *interpolations); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h index 571cd6249f2..5de1f719a2f 100644 --- a/Include/internal/pycore_token.h +++ b/Include/internal/pycore_token.h @@ -75,10 +75,13 @@ extern "C" { #define FSTRING_START 59 #define FSTRING_MIDDLE 60 #define FSTRING_END 61 -#define COMMENT 62 -#define NL 63 -#define ERRORTOKEN 64 -#define N_TOKENS 66 +#define TSTRING_START 62 +#define TSTRING_MIDDLE 63 +#define TSTRING_END 64 +#define COMMENT 65 +#define NL 66 +#define ERRORTOKEN 67 +#define N_TOKENS 69 #define NT_OFFSET 256 /* Special definitions for cooperation with parser */ @@ -91,7 +94,8 @@ extern "C" { (x) == INDENT || \ (x) == DEDENT) #define ISSTRINGLIT(x) ((x) == STRING || \ - (x) == FSTRING_MIDDLE) + (x) == FSTRING_MIDDLE || \ + (x) == TSTRING_MIDDLE) // Export these 4 symbols for 'test_peg_generator' diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h index 741108a957a..d71dd288699 100644 --- a/Include/internal/pycore_traceback.h +++ b/Include/internal/pycore_traceback.h @@ -99,6 +99,9 @@ extern int _PyTraceBack_Print( extern int _Py_WriteIndentedMargin(int, const char*, PyObject *); extern int _Py_WriteIndent(int, PyObject *); +// Export for the faulthandler module +PyAPI_FUNC(void) _Py_DumpStack(int fd); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 5fea3247e8f..3791b913c17 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -139,14 +139,18 @@ extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful( // Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape // chars. // Export for test_peg_generator. -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( const char *string, /* Unicode-Escape encoded string */ Py_ssize_t length, /* size of string */ const char *errors, /* error handling */ Py_ssize_t *consumed, /* bytes consumed */ - const char **first_invalid_escape); /* on return, points to first - invalid escaped char in - string. */ + int *first_invalid_escape_char, /* on return, if not -1, contain the first + invalid escaped char (<= 0xff) or invalid + octal escape (> 0xff) in string. */ + const char **first_invalid_escape_ptr); /* on return, if not NULL, may + point to the first invalid escaped + char in string. + May be NULL if errors is not NULL. */ /* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ @@ -247,6 +251,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( Py_UCS4 *maxchar, int forward); +/* Dedent a string. + Behaviour is expected to be an exact match of `textwrap.dedent`. + Return a new reference on success, NULL with exception set on error. + */ +extern PyObject* _PyUnicode_Dedent(PyObject *unicode); + /* --- Misc functions ----------------------------------------------------- */ extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int); diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 1e1e32bbd42..c0f5f2b17f6 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -100,10 +100,18 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__annotate_func__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(__annotations__); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__annotations_cache__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(__args__); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -912,10 +920,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(aggregate_class); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(alias); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -980,10 +984,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(authorizer_callback); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(autocommit); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1080,6 +1080,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(c_parameter_type); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(c_return); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1264,6 +1268,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(conversion); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(cookie); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1292,6 +1300,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(d_parameter_type); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(data); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1492,6 +1504,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(expression); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(extend); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1608,6 +1624,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(frame_buffer); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(from_param); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -1836,6 +1856,18 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(io); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(is_compress); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(is_raw); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(is_running); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2140,10 +2172,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(n_arg); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(n_fields); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2172,10 +2200,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(narg); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ndigits); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2328,6 +2352,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(parameter); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(parent); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2400,10 +2428,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(progress_handler); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(progress_routine); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2824,10 +2848,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); - string = &_Py_ID(trace_callback); - _PyUnicode_InternStatic(interp, &string); - assert(_PyUnicode_CheckConsistency(string, 1)); - assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(traceback); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2972,6 +2992,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(zstd_dict); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(empty); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index e9a536919da..d08799487fd 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -23,134 +23,149 @@ extern "C" { #define _BINARY_OP_SUBSCR_DICT 311 #define _BINARY_OP_SUBSCR_INIT_CALL 312 #define _BINARY_OP_SUBSCR_LIST_INT 313 -#define _BINARY_OP_SUBSCR_STR_INT 314 -#define _BINARY_OP_SUBSCR_TUPLE_INT 315 -#define _BINARY_OP_SUBTRACT_FLOAT 316 -#define _BINARY_OP_SUBTRACT_INT 317 -#define _BINARY_SLICE 318 +#define _BINARY_OP_SUBSCR_LIST_SLICE 314 +#define _BINARY_OP_SUBSCR_STR_INT 315 +#define _BINARY_OP_SUBSCR_TUPLE_INT 316 +#define _BINARY_OP_SUBTRACT_FLOAT 317 +#define _BINARY_OP_SUBTRACT_INT 318 +#define _BINARY_SLICE 319 +#define _BUILD_INTERPOLATION BUILD_INTERPOLATION #define _BUILD_LIST BUILD_LIST #define _BUILD_MAP BUILD_MAP #define _BUILD_SET BUILD_SET #define _BUILD_SLICE BUILD_SLICE #define _BUILD_STRING BUILD_STRING +#define _BUILD_TEMPLATE BUILD_TEMPLATE #define _BUILD_TUPLE BUILD_TUPLE -#define _CALL_BUILTIN_CLASS 319 -#define _CALL_BUILTIN_FAST 320 -#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 321 -#define _CALL_BUILTIN_O 322 +#define _CALL_BUILTIN_CLASS 320 +#define _CALL_BUILTIN_FAST 321 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 322 +#define _CALL_BUILTIN_O 323 #define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 #define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 -#define _CALL_ISINSTANCE CALL_ISINSTANCE -#define _CALL_KW_NON_PY 323 -#define _CALL_LEN CALL_LEN -#define _CALL_LIST_APPEND CALL_LIST_APPEND -#define _CALL_METHOD_DESCRIPTOR_FAST 324 -#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 325 -#define _CALL_METHOD_DESCRIPTOR_NOARGS 326 -#define _CALL_METHOD_DESCRIPTOR_O 327 -#define _CALL_NON_PY_GENERAL 328 -#define _CALL_STR_1 329 -#define _CALL_TUPLE_1 330 -#define _CALL_TYPE_1 CALL_TYPE_1 -#define _CHECK_AND_ALLOCATE_OBJECT 331 -#define _CHECK_ATTR_CLASS 332 -#define _CHECK_ATTR_METHOD_LAZY_DICT 333 -#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 334 +#define _CALL_ISINSTANCE 324 +#define _CALL_KW_NON_PY 325 +#define _CALL_LEN 326 +#define _CALL_LIST_APPEND 327 +#define _CALL_METHOD_DESCRIPTOR_FAST 328 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 329 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 330 +#define _CALL_METHOD_DESCRIPTOR_O 331 +#define _CALL_NON_PY_GENERAL 332 +#define _CALL_STR_1 333 +#define _CALL_TUPLE_1 334 +#define _CALL_TYPE_1 335 +#define _CHECK_AND_ALLOCATE_OBJECT 336 +#define _CHECK_ATTR_CLASS 337 +#define _CHECK_ATTR_METHOD_LAZY_DICT 338 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 339 #define _CHECK_EG_MATCH CHECK_EG_MATCH #define _CHECK_EXC_MATCH CHECK_EXC_MATCH -#define _CHECK_FUNCTION 335 -#define _CHECK_FUNCTION_EXACT_ARGS 336 -#define _CHECK_FUNCTION_VERSION 337 -#define _CHECK_FUNCTION_VERSION_INLINE 338 -#define _CHECK_FUNCTION_VERSION_KW 339 -#define _CHECK_IS_NOT_PY_CALLABLE 340 -#define _CHECK_IS_NOT_PY_CALLABLE_KW 341 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 342 -#define _CHECK_METHOD_VERSION 343 -#define _CHECK_METHOD_VERSION_KW 344 -#define _CHECK_PEP_523 345 -#define _CHECK_PERIODIC 346 -#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 347 -#define _CHECK_STACK_SPACE 348 -#define _CHECK_STACK_SPACE_OPERAND 349 -#define _CHECK_VALIDITY 350 -#define _COMPARE_OP 351 -#define _COMPARE_OP_FLOAT 352 -#define _COMPARE_OP_INT 353 -#define _COMPARE_OP_STR 354 -#define _CONTAINS_OP 355 -#define _CONTAINS_OP_DICT 356 -#define _CONTAINS_OP_SET 357 +#define _CHECK_FUNCTION 340 +#define _CHECK_FUNCTION_EXACT_ARGS 341 +#define _CHECK_FUNCTION_VERSION 342 +#define _CHECK_FUNCTION_VERSION_INLINE 343 +#define _CHECK_FUNCTION_VERSION_KW 344 +#define _CHECK_IS_NOT_PY_CALLABLE 345 +#define _CHECK_IS_NOT_PY_CALLABLE_KW 346 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 347 +#define _CHECK_METHOD_VERSION 348 +#define _CHECK_METHOD_VERSION_KW 349 +#define _CHECK_PEP_523 350 +#define _CHECK_PERIODIC 351 +#define _CHECK_PERIODIC_IF_NOT_YIELD_FROM 352 +#define _CHECK_RECURSION_REMAINING 353 +#define _CHECK_STACK_SPACE 354 +#define _CHECK_STACK_SPACE_OPERAND 355 +#define _CHECK_VALIDITY 356 +#define _COMPARE_OP 357 +#define _COMPARE_OP_FLOAT 358 +#define _COMPARE_OP_INT 359 +#define _COMPARE_OP_STR 360 +#define _CONTAINS_OP 361 +#define _CONTAINS_OP_DICT 362 +#define _CONTAINS_OP_SET 363 #define _CONVERT_VALUE CONVERT_VALUE #define _COPY COPY #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 358 +#define _CREATE_INIT_FRAME 364 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 359 +#define _DEOPT 365 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 360 -#define _DO_CALL_FUNCTION_EX 361 -#define _DO_CALL_KW 362 +#define _DO_CALL 366 +#define _DO_CALL_FUNCTION_EX 367 +#define _DO_CALL_KW 368 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 363 +#define _ERROR_POP_N 369 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 364 -#define _EXPAND_METHOD_KW 365 -#define _FATAL_ERROR 366 +#define _EXPAND_METHOD 370 +#define _EXPAND_METHOD_KW 371 +#define _FATAL_ERROR 372 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 367 -#define _FOR_ITER_GEN_FRAME 368 -#define _FOR_ITER_TIER_TWO 369 +#define _FOR_ITER 373 +#define _FOR_ITER_GEN_FRAME 374 +#define _FOR_ITER_TIER_TWO 375 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 370 -#define _GUARD_DORV_NO_DICT 371 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 372 -#define _GUARD_GLOBALS_VERSION 373 -#define _GUARD_IS_FALSE_POP 374 -#define _GUARD_IS_NONE_POP 375 -#define _GUARD_IS_NOT_NONE_POP 376 -#define _GUARD_IS_TRUE_POP 377 -#define _GUARD_KEYS_VERSION 378 -#define _GUARD_NOS_DICT 379 -#define _GUARD_NOS_FLOAT 380 -#define _GUARD_NOS_INT 381 -#define _GUARD_NOS_LIST 382 -#define _GUARD_NOS_TUPLE 383 -#define _GUARD_NOS_UNICODE 384 -#define _GUARD_NOT_EXHAUSTED_LIST 385 -#define _GUARD_NOT_EXHAUSTED_RANGE 386 -#define _GUARD_NOT_EXHAUSTED_TUPLE 387 -#define _GUARD_TOS_ANY_SET 388 -#define _GUARD_TOS_DICT 389 -#define _GUARD_TOS_FLOAT 390 -#define _GUARD_TOS_INT 391 -#define _GUARD_TOS_LIST 392 -#define _GUARD_TOS_TUPLE 393 -#define _GUARD_TOS_UNICODE 394 -#define _GUARD_TYPE_VERSION 395 -#define _GUARD_TYPE_VERSION_AND_LOCK 396 +#define _GUARD_BINARY_OP_EXTEND 376 +#define _GUARD_CALLABLE_ISINSTANCE 377 +#define _GUARD_CALLABLE_LEN 378 +#define _GUARD_CALLABLE_LIST_APPEND 379 +#define _GUARD_CALLABLE_STR_1 380 +#define _GUARD_CALLABLE_TUPLE_1 381 +#define _GUARD_CALLABLE_TYPE_1 382 +#define _GUARD_DORV_NO_DICT 383 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384 +#define _GUARD_GLOBALS_VERSION 385 +#define _GUARD_IS_FALSE_POP 386 +#define _GUARD_IS_NONE_POP 387 +#define _GUARD_IS_NOT_NONE_POP 388 +#define _GUARD_IS_TRUE_POP 389 +#define _GUARD_KEYS_VERSION 390 +#define _GUARD_NOS_DICT 391 +#define _GUARD_NOS_FLOAT 392 +#define _GUARD_NOS_INT 393 +#define _GUARD_NOS_LIST 394 +#define _GUARD_NOS_NOT_NULL 395 +#define _GUARD_NOS_NULL 396 +#define _GUARD_NOS_TUPLE 397 +#define _GUARD_NOS_UNICODE 398 +#define _GUARD_NOT_EXHAUSTED_LIST 399 +#define _GUARD_NOT_EXHAUSTED_RANGE 400 +#define _GUARD_NOT_EXHAUSTED_TUPLE 401 +#define _GUARD_THIRD_NULL 402 +#define _GUARD_TOS_ANY_SET 403 +#define _GUARD_TOS_DICT 404 +#define _GUARD_TOS_FLOAT 405 +#define _GUARD_TOS_INT 406 +#define _GUARD_TOS_LIST 407 +#define _GUARD_TOS_SLICE 408 +#define _GUARD_TOS_TUPLE 409 +#define _GUARD_TOS_UNICODE 410 +#define _GUARD_TYPE_VERSION 411 +#define _GUARD_TYPE_VERSION_AND_LOCK 412 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 397 -#define _INIT_CALL_PY_EXACT_ARGS 398 -#define _INIT_CALL_PY_EXACT_ARGS_0 399 -#define _INIT_CALL_PY_EXACT_ARGS_1 400 -#define _INIT_CALL_PY_EXACT_ARGS_2 401 -#define _INIT_CALL_PY_EXACT_ARGS_3 402 -#define _INIT_CALL_PY_EXACT_ARGS_4 403 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413 +#define _INIT_CALL_PY_EXACT_ARGS 414 +#define _INIT_CALL_PY_EXACT_ARGS_0 415 +#define _INIT_CALL_PY_EXACT_ARGS_1 416 +#define _INIT_CALL_PY_EXACT_ARGS_2 417 +#define _INIT_CALL_PY_EXACT_ARGS_3 418 +#define _INIT_CALL_PY_EXACT_ARGS_4 419 +#define _INSERT_NULL 420 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -160,163 +175,170 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 404 +#define _IS_NONE 421 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 405 -#define _ITER_CHECK_RANGE 406 -#define _ITER_CHECK_TUPLE 407 -#define _ITER_JUMP_LIST 408 -#define _ITER_JUMP_RANGE 409 -#define _ITER_JUMP_TUPLE 410 -#define _ITER_NEXT_LIST 411 -#define _ITER_NEXT_LIST_TIER_TWO 412 -#define _ITER_NEXT_RANGE 413 -#define _ITER_NEXT_TUPLE 414 -#define _JUMP_TO_TOP 415 +#define _ITER_CHECK_LIST 422 +#define _ITER_CHECK_RANGE 423 +#define _ITER_CHECK_TUPLE 424 +#define _ITER_JUMP_LIST 425 +#define _ITER_JUMP_RANGE 426 +#define _ITER_JUMP_TUPLE 427 +#define _ITER_NEXT_LIST 428 +#define _ITER_NEXT_LIST_TIER_TWO 429 +#define _ITER_NEXT_RANGE 430 +#define _ITER_NEXT_TUPLE 431 +#define _JUMP_TO_TOP 432 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 416 -#define _LOAD_ATTR_CLASS 417 +#define _LOAD_ATTR 433 +#define _LOAD_ATTR_CLASS 434 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 418 -#define _LOAD_ATTR_METHOD_LAZY_DICT 419 -#define _LOAD_ATTR_METHOD_NO_DICT 420 -#define _LOAD_ATTR_METHOD_WITH_VALUES 421 -#define _LOAD_ATTR_MODULE 422 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 423 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 424 -#define _LOAD_ATTR_PROPERTY_FRAME 425 -#define _LOAD_ATTR_SLOT 426 -#define _LOAD_ATTR_WITH_HINT 427 +#define _LOAD_ATTR_INSTANCE_VALUE 435 +#define _LOAD_ATTR_METHOD_LAZY_DICT 436 +#define _LOAD_ATTR_METHOD_NO_DICT 437 +#define _LOAD_ATTR_METHOD_WITH_VALUES 438 +#define _LOAD_ATTR_MODULE 439 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441 +#define _LOAD_ATTR_PROPERTY_FRAME 442 +#define _LOAD_ATTR_SLOT 443 +#define _LOAD_ATTR_WITH_HINT 444 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 428 +#define _LOAD_BYTECODE 445 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 429 -#define _LOAD_CONST_INLINE_BORROW 430 -#define _LOAD_CONST_MORTAL LOAD_CONST_MORTAL +#define _LOAD_CONST_INLINE 446 +#define _LOAD_CONST_INLINE_BORROW 447 +#define _LOAD_CONST_UNDER_INLINE 448 +#define _LOAD_CONST_UNDER_INLINE_BORROW 449 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 431 -#define _LOAD_FAST_0 432 -#define _LOAD_FAST_1 433 -#define _LOAD_FAST_2 434 -#define _LOAD_FAST_3 435 -#define _LOAD_FAST_4 436 -#define _LOAD_FAST_5 437 -#define _LOAD_FAST_6 438 -#define _LOAD_FAST_7 439 +#define _LOAD_FAST 450 +#define _LOAD_FAST_0 451 +#define _LOAD_FAST_1 452 +#define _LOAD_FAST_2 453 +#define _LOAD_FAST_3 454 +#define _LOAD_FAST_4 455 +#define _LOAD_FAST_5 456 +#define _LOAD_FAST_6 457 +#define _LOAD_FAST_7 458 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 440 -#define _LOAD_FAST_BORROW_0 441 -#define _LOAD_FAST_BORROW_1 442 -#define _LOAD_FAST_BORROW_2 443 -#define _LOAD_FAST_BORROW_3 444 -#define _LOAD_FAST_BORROW_4 445 -#define _LOAD_FAST_BORROW_5 446 -#define _LOAD_FAST_BORROW_6 447 -#define _LOAD_FAST_BORROW_7 448 +#define _LOAD_FAST_BORROW 459 +#define _LOAD_FAST_BORROW_0 460 +#define _LOAD_FAST_BORROW_1 461 +#define _LOAD_FAST_BORROW_2 462 +#define _LOAD_FAST_BORROW_3 463 +#define _LOAD_FAST_BORROW_4 464 +#define _LOAD_FAST_BORROW_5 465 +#define _LOAD_FAST_BORROW_6 466 +#define _LOAD_FAST_BORROW_7 467 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 449 -#define _LOAD_GLOBAL_BUILTINS 450 -#define _LOAD_GLOBAL_MODULE 451 +#define _LOAD_GLOBAL 468 +#define _LOAD_GLOBAL_BUILTINS 469 +#define _LOAD_GLOBAL_MODULE 470 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 452 -#define _LOAD_SMALL_INT_0 453 -#define _LOAD_SMALL_INT_1 454 -#define _LOAD_SMALL_INT_2 455 -#define _LOAD_SMALL_INT_3 456 -#define _LOAD_SPECIAL LOAD_SPECIAL +#define _LOAD_SMALL_INT 471 +#define _LOAD_SMALL_INT_0 472 +#define _LOAD_SMALL_INT_1 473 +#define _LOAD_SMALL_INT_2 474 +#define _LOAD_SMALL_INT_3 475 +#define _LOAD_SPECIAL 476 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 457 +#define _MAKE_CALLARGS_A_TUPLE 477 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 458 +#define _MAKE_WARM 478 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 459 -#define _MAYBE_EXPAND_METHOD_KW 460 -#define _MONITOR_CALL 461 -#define _MONITOR_CALL_KW 462 -#define _MONITOR_JUMP_BACKWARD 463 -#define _MONITOR_RESUME 464 +#define _MAYBE_EXPAND_METHOD 479 +#define _MAYBE_EXPAND_METHOD_KW 480 +#define _MONITOR_CALL 481 +#define _MONITOR_CALL_KW 482 +#define _MONITOR_JUMP_BACKWARD 483 +#define _MONITOR_RESUME 484 #define _NOP NOP +#define _POP_CALL 485 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486 +#define _POP_CALL_ONE 487 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488 +#define _POP_CALL_TWO 489 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 465 -#define _POP_JUMP_IF_TRUE 466 +#define _POP_JUMP_IF_FALSE 491 +#define _POP_JUMP_IF_TRUE 492 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 467 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 468 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 469 +#define _POP_TOP_LOAD_CONST_INLINE 493 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494 +#define _POP_TWO 495 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 470 +#define _PUSH_FRAME 497 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 471 -#define _PY_FRAME_GENERAL 472 -#define _PY_FRAME_KW 473 -#define _QUICKEN_RESUME 474 -#define _REPLACE_WITH_TRUE 475 +#define _PUSH_NULL_CONDITIONAL 498 +#define _PY_FRAME_GENERAL 499 +#define _PY_FRAME_KW 500 +#define _QUICKEN_RESUME 501 +#define _REPLACE_WITH_TRUE 502 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 476 -#define _SEND 477 -#define _SEND_GEN_FRAME 478 +#define _SAVE_RETURN_OFFSET 503 +#define _SEND 504 +#define _SEND_GEN_FRAME 505 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 479 -#define _STORE_ATTR 480 -#define _STORE_ATTR_INSTANCE_VALUE 481 -#define _STORE_ATTR_SLOT 482 -#define _STORE_ATTR_WITH_HINT 483 +#define _START_EXECUTOR 506 +#define _STORE_ATTR 507 +#define _STORE_ATTR_INSTANCE_VALUE 508 +#define _STORE_ATTR_SLOT 509 +#define _STORE_ATTR_WITH_HINT 510 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 484 -#define _STORE_FAST_0 485 -#define _STORE_FAST_1 486 -#define _STORE_FAST_2 487 -#define _STORE_FAST_3 488 -#define _STORE_FAST_4 489 -#define _STORE_FAST_5 490 -#define _STORE_FAST_6 491 -#define _STORE_FAST_7 492 +#define _STORE_FAST 511 +#define _STORE_FAST_0 512 +#define _STORE_FAST_1 513 +#define _STORE_FAST_2 514 +#define _STORE_FAST_3 515 +#define _STORE_FAST_4 516 +#define _STORE_FAST_5 517 +#define _STORE_FAST_6 518 +#define _STORE_FAST_7 519 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 493 -#define _STORE_SUBSCR 494 -#define _STORE_SUBSCR_DICT 495 -#define _STORE_SUBSCR_LIST_INT 496 +#define _STORE_SLICE 520 +#define _STORE_SUBSCR 521 +#define _STORE_SUBSCR_DICT 522 +#define _STORE_SUBSCR_LIST_INT 523 #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 497 -#define _TO_BOOL 498 +#define _TIER2_RESUME_CHECK 524 +#define _TO_BOOL 525 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 499 +#define _TO_BOOL_LIST 526 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 500 +#define _TO_BOOL_STR 527 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 501 -#define _UNPACK_SEQUENCE_LIST 502 -#define _UNPACK_SEQUENCE_TUPLE 503 -#define _UNPACK_SEQUENCE_TWO_TUPLE 504 +#define _UNPACK_SEQUENCE 528 +#define _UNPACK_SEQUENCE_LIST 529 +#define _UNPACK_SEQUENCE_TUPLE 530 +#define _UNPACK_SEQUENCE_TWO_TUPLE 531 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 504 +#define MAX_UOP_ID 531 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 8fa50ff2c29..5ebe124983b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -45,8 +45,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_LOAD_FAST_BORROW_LOAD_FAST_BORROW] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, - [_LOAD_CONST_MORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, - [_LOAD_CONST_IMMORTAL] = HAS_ARG_FLAG | HAS_CONST_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG, [_LOAD_SMALL_INT_0] = 0, [_LOAD_SMALL_INT_1] = 0, [_LOAD_SMALL_INT_2] = 0, @@ -64,6 +63,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, [_END_SEND] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, @@ -74,7 +74,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_TO_BOOL_INT] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_LIST] = HAS_EXIT_FLAG, [_GUARD_TOS_LIST] = HAS_EXIT_FLAG, - [_TO_BOOL_LIST] = 0, + [_GUARD_TOS_SLICE] = HAS_EXIT_FLAG, + [_TO_BOOL_LIST] = HAS_ESCAPES_FLAG, [_TO_BOOL_NONE] = HAS_EXIT_FLAG, [_GUARD_NOS_UNICODE] = HAS_EXIT_FLAG, [_GUARD_TOS_UNICODE] = HAS_EXIT_FLAG, @@ -98,10 +99,11 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_OP_SUBSCR_LIST_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_OP_SUBSCR_STR_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_TUPLE] = HAS_EXIT_FLAG, [_GUARD_TOS_TUPLE] = HAS_EXIT_FLAG, - [_BINARY_OP_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, + [_BINARY_OP_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_GUARD_NOS_DICT] = HAS_EXIT_FLAG, [_GUARD_TOS_DICT] = HAS_EXIT_FLAG, [_BINARY_OP_SUBSCR_DICT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -128,8 +130,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DELETE_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -149,7 +151,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG, [_COPY_FREE_VARS] = HAS_ARG_FLAG, - [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BUILD_INTERPOLATION] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BUILD_TEMPLATE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BUILD_TUPLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, [_BUILD_LIST] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_LIST_EXTEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -169,9 +173,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_INSTANCE_VALUE] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_MODULE] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_LOAD_ATTR_SLOT] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_SLOT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_CHECK_ATTR_CLASS] = HAS_EXIT_FLAG, - [_LOAD_ATTR_CLASS] = 0, + [_LOAD_ATTR_CLASS] = HAS_ESCAPES_FLAG, [_LOAD_ATTR_PROPERTY_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_GUARD_DORV_NO_DICT] = HAS_EXIT_FLAG, [_STORE_ATTR_INSTANCE_VALUE] = HAS_ESCAPES_FLAG, @@ -181,7 +185,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG, [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_COMPARE_OP_STR] = HAS_ARG_FLAG, - [_IS_OP] = HAS_ARG_FLAG, + [_IS_OP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_CONTAINS_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_TOS_ANY_SET] = HAS_DEOPT_FLAG, [_CONTAINS_OP_SET] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -190,7 +194,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_EXC_MATCH] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_IMPORT_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_IMPORT_FROM] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_IS_NONE] = 0, + [_IS_NONE] = HAS_ESCAPES_FLAG, [_GET_LEN] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MATCH_CLASS] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_MATCH_MAPPING] = 0, @@ -209,7 +213,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_EXIT_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG, [_FOR_ITER_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_LOAD_SPECIAL] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_INSERT_NULL] = 0, + [_LOAD_SPECIAL] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_EXC_INFO] = 0, [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG, @@ -233,6 +238,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = HAS_DEOPT_FLAG, [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG, [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CHECK_RECURSION_REMAINING] = HAS_DEOPT_FLAG, [_INIT_CALL_PY_EXACT_ARGS_0] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS_1] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS_2] = HAS_PURE_FLAG, @@ -240,9 +246,15 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG, [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_PUSH_FRAME] = 0, - [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, - [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_NOS_NULL] = HAS_DEOPT_FLAG, + [_GUARD_NOS_NOT_NULL] = HAS_EXIT_FLAG, + [_GUARD_THIRD_NULL] = HAS_DEOPT_FLAG, + [_GUARD_CALLABLE_TYPE_1] = HAS_DEOPT_FLAG, + [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALLABLE_STR_1] = HAS_DEOPT_FLAG, + [_CALL_STR_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALLABLE_TUPLE_1] = HAS_DEOPT_FLAG, + [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CHECK_AND_ALLOCATE_OBJECT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CREATE_INIT_FRAME] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, @@ -250,8 +262,11 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_CALL_LEN] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_CALL_ISINSTANCE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALLABLE_LEN] = HAS_DEOPT_FLAG, + [_CALL_LEN] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALLABLE_ISINSTANCE] = HAS_DEOPT_FLAG, + [_CALL_ISINSTANCE] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_CALLABLE_LIST_APPEND] = HAS_DEOPT_FLAG, [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -268,7 +283,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG, [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, @@ -288,10 +303,18 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, [_POP_TOP_LOAD_CONST_INLINE] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, - [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, - [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_CALL] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO] = HAS_ESCAPES_FLAG, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = HAS_ESCAPES_FLAG, + [_LOAD_CONST_UNDER_INLINE] = 0, + [_LOAD_CONST_UNDER_INLINE_BORROW] = 0, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, - [_START_EXECUTOR] = HAS_ESCAPES_FLAG, + [_START_EXECUTOR] = 0, [_MAKE_WARM] = 0, [_FATAL_ERROR] = 0, [_DEOPT] = 0, @@ -320,16 +343,19 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_BINARY_OP_SUBSCR_DICT] = "_BINARY_OP_SUBSCR_DICT", [_BINARY_OP_SUBSCR_INIT_CALL] = "_BINARY_OP_SUBSCR_INIT_CALL", [_BINARY_OP_SUBSCR_LIST_INT] = "_BINARY_OP_SUBSCR_LIST_INT", + [_BINARY_OP_SUBSCR_LIST_SLICE] = "_BINARY_OP_SUBSCR_LIST_SLICE", [_BINARY_OP_SUBSCR_STR_INT] = "_BINARY_OP_SUBSCR_STR_INT", [_BINARY_OP_SUBSCR_TUPLE_INT] = "_BINARY_OP_SUBSCR_TUPLE_INT", [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT", [_BINARY_OP_SUBTRACT_INT] = "_BINARY_OP_SUBTRACT_INT", [_BINARY_SLICE] = "_BINARY_SLICE", + [_BUILD_INTERPOLATION] = "_BUILD_INTERPOLATION", [_BUILD_LIST] = "_BUILD_LIST", [_BUILD_MAP] = "_BUILD_MAP", [_BUILD_SET] = "_BUILD_SET", [_BUILD_SLICE] = "_BUILD_SLICE", [_BUILD_STRING] = "_BUILD_STRING", + [_BUILD_TEMPLATE] = "_BUILD_TEMPLATE", [_BUILD_TUPLE] = "_BUILD_TUPLE", [_CALL_BUILTIN_CLASS] = "_CALL_BUILTIN_CLASS", [_CALL_BUILTIN_FAST] = "_CALL_BUILTIN_FAST", @@ -368,6 +394,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = "_CHECK_PEP_523", [_CHECK_PERIODIC] = "_CHECK_PERIODIC", [_CHECK_PERIODIC_IF_NOT_YIELD_FROM] = "_CHECK_PERIODIC_IF_NOT_YIELD_FROM", + [_CHECK_RECURSION_REMAINING] = "_CHECK_RECURSION_REMAINING", [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", @@ -410,6 +437,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GET_LEN] = "_GET_LEN", [_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER", [_GUARD_BINARY_OP_EXTEND] = "_GUARD_BINARY_OP_EXTEND", + [_GUARD_CALLABLE_ISINSTANCE] = "_GUARD_CALLABLE_ISINSTANCE", + [_GUARD_CALLABLE_LEN] = "_GUARD_CALLABLE_LEN", + [_GUARD_CALLABLE_LIST_APPEND] = "_GUARD_CALLABLE_LIST_APPEND", + [_GUARD_CALLABLE_STR_1] = "_GUARD_CALLABLE_STR_1", + [_GUARD_CALLABLE_TUPLE_1] = "_GUARD_CALLABLE_TUPLE_1", + [_GUARD_CALLABLE_TYPE_1] = "_GUARD_CALLABLE_TYPE_1", [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", @@ -422,16 +455,20 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_NOS_FLOAT] = "_GUARD_NOS_FLOAT", [_GUARD_NOS_INT] = "_GUARD_NOS_INT", [_GUARD_NOS_LIST] = "_GUARD_NOS_LIST", + [_GUARD_NOS_NOT_NULL] = "_GUARD_NOS_NOT_NULL", + [_GUARD_NOS_NULL] = "_GUARD_NOS_NULL", [_GUARD_NOS_TUPLE] = "_GUARD_NOS_TUPLE", [_GUARD_NOS_UNICODE] = "_GUARD_NOS_UNICODE", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", [_GUARD_NOT_EXHAUSTED_RANGE] = "_GUARD_NOT_EXHAUSTED_RANGE", [_GUARD_NOT_EXHAUSTED_TUPLE] = "_GUARD_NOT_EXHAUSTED_TUPLE", + [_GUARD_THIRD_NULL] = "_GUARD_THIRD_NULL", [_GUARD_TOS_ANY_SET] = "_GUARD_TOS_ANY_SET", [_GUARD_TOS_DICT] = "_GUARD_TOS_DICT", [_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT", [_GUARD_TOS_INT] = "_GUARD_TOS_INT", [_GUARD_TOS_LIST] = "_GUARD_TOS_LIST", + [_GUARD_TOS_SLICE] = "_GUARD_TOS_SLICE", [_GUARD_TOS_TUPLE] = "_GUARD_TOS_TUPLE", [_GUARD_TOS_UNICODE] = "_GUARD_TOS_UNICODE", [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", @@ -445,6 +482,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_INIT_CALL_PY_EXACT_ARGS_2] = "_INIT_CALL_PY_EXACT_ARGS_2", [_INIT_CALL_PY_EXACT_ARGS_3] = "_INIT_CALL_PY_EXACT_ARGS_3", [_INIT_CALL_PY_EXACT_ARGS_4] = "_INIT_CALL_PY_EXACT_ARGS_4", + [_INSERT_NULL] = "_INSERT_NULL", [_IS_NONE] = "_IS_NONE", [_IS_OP] = "_IS_OP", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", @@ -470,10 +508,11 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_COMMON_CONSTANT] = "_LOAD_COMMON_CONSTANT", - [_LOAD_CONST_IMMORTAL] = "_LOAD_CONST_IMMORTAL", + [_LOAD_CONST] = "_LOAD_CONST", [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", - [_LOAD_CONST_MORTAL] = "_LOAD_CONST_MORTAL", + [_LOAD_CONST_UNDER_INLINE] = "_LOAD_CONST_UNDER_INLINE", + [_LOAD_CONST_UNDER_INLINE_BORROW] = "_LOAD_CONST_UNDER_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", [_LOAD_FAST_0] = "_LOAD_FAST_0", @@ -523,10 +562,17 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_MAYBE_EXPAND_METHOD] = "_MAYBE_EXPAND_METHOD", [_MAYBE_EXPAND_METHOD_KW] = "_MAYBE_EXPAND_METHOD_KW", [_NOP] = "_NOP", + [_POP_CALL] = "_POP_CALL", + [_POP_CALL_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_ONE] = "_POP_CALL_ONE", + [_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_ONE_LOAD_CONST_INLINE_BORROW", + [_POP_CALL_TWO] = "_POP_CALL_TWO", + [_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_CALL_TWO_LOAD_CONST_INLINE_BORROW", [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_FRAME] = "_PUSH_FRAME", @@ -642,9 +688,7 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_FAST_BORROW_LOAD_FAST_BORROW: return 0; - case _LOAD_CONST_MORTAL: - return 0; - case _LOAD_CONST_IMMORTAL: + case _LOAD_CONST: return 0; case _LOAD_SMALL_INT_0: return 0; @@ -680,6 +724,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TWO: + return 2; case _PUSH_NULL: return 0; case _END_FOR: @@ -700,6 +746,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _GUARD_TOS_LIST: return 0; + case _GUARD_TOS_SLICE: + return 0; case _TO_BOOL_LIST: return 1; case _TO_BOOL_NONE: @@ -748,6 +796,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 4; case _BINARY_OP_SUBSCR_LIST_INT: return 2; + case _BINARY_OP_SUBSCR_LIST_SLICE: + return 2; case _BINARY_OP_SUBSCR_STR_INT: return 2; case _GUARD_NOS_TUPLE: @@ -852,6 +902,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _BUILD_STRING: return oparg; + case _BUILD_INTERPOLATION: + return 2 + (oparg & 1); + case _BUILD_TEMPLATE: + return 2; case _BUILD_TUPLE: return oparg; case _BUILD_LIST: @@ -970,8 +1024,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _FOR_ITER_GEN_FRAME: return 0; - case _LOAD_SPECIAL: + case _INSERT_NULL: return 1; + case _LOAD_SPECIAL: + return 0; case _WITH_EXCEPT_START: return 0; case _PUSH_EXC_INFO: @@ -1018,6 +1074,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _CHECK_STACK_SPACE: return 0; + case _CHECK_RECURSION_REMAINING: + return 0; case _INIT_CALL_PY_EXACT_ARGS_0: return 2 + oparg; case _INIT_CALL_PY_EXACT_ARGS_1: @@ -1032,10 +1090,22 @@ int _PyUop_num_popped(int opcode, int oparg) return 2 + oparg; case _PUSH_FRAME: return 1; + case _GUARD_NOS_NULL: + return 0; + case _GUARD_NOS_NOT_NULL: + return 0; + case _GUARD_THIRD_NULL: + return 0; + case _GUARD_CALLABLE_TYPE_1: + return 0; case _CALL_TYPE_1: return 3; + case _GUARD_CALLABLE_STR_1: + return 0; case _CALL_STR_1: return 3; + case _GUARD_CALLABLE_TUPLE_1: + return 0; case _CALL_TUPLE_1: return 3; case _CHECK_AND_ALLOCATE_OBJECT: @@ -1052,10 +1122,16 @@ int _PyUop_num_popped(int opcode, int oparg) return 2 + oparg; case _CALL_BUILTIN_FAST_WITH_KEYWORDS: return 2 + oparg; + case _GUARD_CALLABLE_LEN: + return 0; case _CALL_LEN: - return 2 + oparg; + return 3; + case _GUARD_CALLABLE_ISINSTANCE: + return 0; case _CALL_ISINSTANCE: - return 2 + oparg; + return 4; + case _GUARD_CALLABLE_LIST_APPEND: + return 0; case _CALL_LIST_APPEND: return 3; case _CALL_METHOD_DESCRIPTOR_O: @@ -1067,7 +1143,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _CALL_METHOD_DESCRIPTOR_FAST: return 2 + oparg; case _MAYBE_EXPAND_METHOD_KW: - return 1; + return 0; case _PY_FRAME_KW: return 3 + oparg; case _CHECK_FUNCTION_VERSION_KW: @@ -1081,7 +1157,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _CALL_KW_NON_PY: return 3 + oparg; case _MAKE_CALLARGS_A_TUPLE: - return 2; + return 0; case _MAKE_FUNCTION: return 1; case _SET_FUNCTION_ATTRIBUTE: @@ -1128,10 +1204,26 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _LOAD_CONST_INLINE_BORROW: return 0; + case _POP_CALL: + return 2; + case _POP_CALL_ONE: + return 3; + case _POP_CALL_TWO: + return 4; case _POP_TOP_LOAD_CONST_INLINE_BORROW: return 1; case _POP_TWO_LOAD_CONST_INLINE_BORROW: return 2; + case _POP_CALL_LOAD_CONST_INLINE_BORROW: + return 2; + case _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW: + return 3; + case _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW: + return 4; + case _LOAD_CONST_UNDER_INLINE: + return 1; + case _LOAD_CONST_UNDER_INLINE_BORROW: + return 1; case _CHECK_FUNCTION: return 0; case _START_EXECUTOR: diff --git a/Include/longobject.h b/Include/longobject.h index 45c0d218c13..19f06977036 100644 --- a/Include/longobject.h +++ b/Include/longobject.h @@ -40,6 +40,58 @@ PyAPI_FUNC(int) PyLong_AsInt32(PyObject *obj, int32_t *value); PyAPI_FUNC(int) PyLong_AsUInt32(PyObject *obj, uint32_t *value); PyAPI_FUNC(int) PyLong_AsInt64(PyObject *obj, int64_t *value); PyAPI_FUNC(int) PyLong_AsUInt64(PyObject *obj, uint64_t *value); + +#define Py_ASNATIVEBYTES_DEFAULTS -1 +#define Py_ASNATIVEBYTES_BIG_ENDIAN 0 +#define Py_ASNATIVEBYTES_LITTLE_ENDIAN 1 +#define Py_ASNATIVEBYTES_NATIVE_ENDIAN 3 +#define Py_ASNATIVEBYTES_UNSIGNED_BUFFER 4 +#define Py_ASNATIVEBYTES_REJECT_NEGATIVE 8 +#define Py_ASNATIVEBYTES_ALLOW_INDEX 16 + +/* PyLong_AsNativeBytes: Copy the integer value to a native variable. + buffer points to the first byte of the variable. + n_bytes is the number of bytes available in the buffer. Pass 0 to request + the required size for the value. + flags is a bitfield of the following flags: + * 1 - little endian + * 2 - native endian + * 4 - unsigned destination (e.g. don't reject copying 255 into one byte) + * 8 - raise an exception for negative inputs + * 16 - call __index__ on non-int types + If flags is -1 (all bits set), native endian is used, value truncation + behaves most like C (allows negative inputs and allow MSB set), and non-int + objects will raise a TypeError. + Big endian mode will write the most significant byte into the address + directly referenced by buffer; little endian will write the least significant + byte into that address. + + If an exception is raised, returns a negative value. + Otherwise, returns the number of bytes that are required to store the value. + To check that the full value is represented, ensure that the return value is + equal or less than n_bytes. + All n_bytes are guaranteed to be written (unless an exception occurs), and + so ignoring a positive return value is the equivalent of a downcast in C. + In cases where the full value could not be represented, the returned value + may be larger than necessary - this function is not an accurate way to + calculate the bit length of an integer object. + */ +PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, + Py_ssize_t n_bytes, int flags); + +/* PyLong_FromNativeBytes: Create an int value from a native integer + n_bytes is the number of bytes to read from the buffer. Passing 0 will + always produce the zero int. + PyLong_FromUnsignedNativeBytes always produces a non-negative int. + flags is the same as for PyLong_AsNativeBytes, but only supports selecting + the endianness or forcing an unsigned buffer. + + Returns the int object, or NULL with an exception set. */ +PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, + int flags); +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, + size_t n_bytes, int flags); + #endif PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); diff --git a/Include/methodobject.h b/Include/methodobject.h index cfff05f8033..e6ec6421d1e 100644 --- a/Include/methodobject.h +++ b/Include/methodobject.h @@ -33,7 +33,7 @@ typedef PyObject *(*PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, typedef PyCFunctionFast _PyCFunctionFast; typedef PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords; -// Cast an function to the PyCFunction type to use it with PyMethodDef. +// Cast a function to the PyCFunction type to use it with PyMethodDef. // // This macro can be used to prevent compiler warnings if the first parameter // uses a different pointer type than PyObject* (ex: METH_VARARGS and METH_O @@ -49,8 +49,17 @@ typedef PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords; // used to prevent a compiler warning. If the function has a single parameter, // it triggers an undefined behavior when Python calls it with 2 parameters // (bpo-33012). -#define _PyCFunction_CAST(func) \ - _Py_CAST(PyCFunction, _Py_CAST(void(*)(void), (func))) +#define _PyCFunction_CAST(func) \ + _Py_FUNC_CAST(PyCFunction, func) +// The macros below are given for semantic convenience, allowing users +// to see whether a cast to suppress an undefined behavior is necessary. +// Note: At runtime, the original function signature must be respected. +#define _PyCFunctionFast_CAST(func) \ + _Py_FUNC_CAST(PyCFunctionFast, func) +#define _PyCFunctionWithKeywords_CAST(func) \ + _Py_FUNC_CAST(PyCFunctionWithKeywords, func) +#define _PyCFunctionFastWithKeywords_CAST(func) \ + _Py_FUNC_CAST(PyCFunctionFastWithKeywords, func) PyAPI_FUNC(PyCFunction) PyCFunction_GetFunction(PyObject *); PyAPI_FUNC(PyObject *) PyCFunction_GetSelf(PyObject *); diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index 898dc580f41..1d5c74adefc 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -12,123 +12,125 @@ extern "C" { /* Instruction opcodes for compiled code */ #define CACHE 0 #define BINARY_SLICE 1 -#define CALL_FUNCTION_EX 2 +#define BUILD_TEMPLATE 2 #define BINARY_OP_INPLACE_ADD_UNICODE 3 -#define CHECK_EG_MATCH 4 -#define CHECK_EXC_MATCH 5 -#define CLEANUP_THROW 6 -#define DELETE_SUBSCR 7 -#define END_FOR 8 -#define END_SEND 9 -#define EXIT_INIT_CHECK 10 -#define FORMAT_SIMPLE 11 -#define FORMAT_WITH_SPEC 12 -#define GET_AITER 13 -#define GET_ANEXT 14 -#define GET_ITER 15 -#define GET_LEN 16 +#define CALL_FUNCTION_EX 4 +#define CHECK_EG_MATCH 5 +#define CHECK_EXC_MATCH 6 +#define CLEANUP_THROW 7 +#define DELETE_SUBSCR 8 +#define END_FOR 9 +#define END_SEND 10 +#define EXIT_INIT_CHECK 11 +#define FORMAT_SIMPLE 12 +#define FORMAT_WITH_SPEC 13 +#define GET_AITER 14 +#define GET_ANEXT 15 +#define GET_ITER 16 #define RESERVED 17 -#define GET_YIELD_FROM_ITER 18 -#define INTERPRETER_EXIT 19 -#define LOAD_BUILD_CLASS 20 -#define LOAD_LOCALS 21 -#define MAKE_FUNCTION 22 -#define MATCH_KEYS 23 -#define MATCH_MAPPING 24 -#define MATCH_SEQUENCE 25 -#define NOP 26 -#define NOT_TAKEN 27 -#define POP_EXCEPT 28 -#define POP_ITER 29 -#define POP_TOP 30 -#define PUSH_EXC_INFO 31 -#define PUSH_NULL 32 -#define RETURN_GENERATOR 33 -#define RETURN_VALUE 34 -#define SETUP_ANNOTATIONS 35 -#define STORE_SLICE 36 -#define STORE_SUBSCR 37 -#define TO_BOOL 38 -#define UNARY_INVERT 39 -#define UNARY_NEGATIVE 40 -#define UNARY_NOT 41 -#define WITH_EXCEPT_START 42 -#define BINARY_OP 43 -#define BUILD_LIST 44 -#define BUILD_MAP 45 -#define BUILD_SET 46 -#define BUILD_SLICE 47 -#define BUILD_STRING 48 -#define BUILD_TUPLE 49 -#define CALL 50 -#define CALL_INTRINSIC_1 51 -#define CALL_INTRINSIC_2 52 -#define CALL_KW 53 -#define COMPARE_OP 54 -#define CONTAINS_OP 55 -#define CONVERT_VALUE 56 -#define COPY 57 -#define COPY_FREE_VARS 58 -#define DELETE_ATTR 59 -#define DELETE_DEREF 60 -#define DELETE_FAST 61 -#define DELETE_GLOBAL 62 -#define DELETE_NAME 63 -#define DICT_MERGE 64 -#define DICT_UPDATE 65 -#define END_ASYNC_FOR 66 -#define EXTENDED_ARG 67 -#define FOR_ITER 68 -#define GET_AWAITABLE 69 -#define IMPORT_FROM 70 -#define IMPORT_NAME 71 -#define IS_OP 72 -#define JUMP_BACKWARD 73 -#define JUMP_BACKWARD_NO_INTERRUPT 74 -#define JUMP_FORWARD 75 -#define LIST_APPEND 76 -#define LIST_EXTEND 77 -#define LOAD_ATTR 78 -#define LOAD_COMMON_CONSTANT 79 -#define LOAD_CONST 80 -#define LOAD_DEREF 81 -#define LOAD_FAST 82 -#define LOAD_FAST_AND_CLEAR 83 -#define LOAD_FAST_BORROW 84 -#define LOAD_FAST_BORROW_LOAD_FAST_BORROW 85 -#define LOAD_FAST_CHECK 86 -#define LOAD_FAST_LOAD_FAST 87 -#define LOAD_FROM_DICT_OR_DEREF 88 -#define LOAD_FROM_DICT_OR_GLOBALS 89 -#define LOAD_GLOBAL 90 -#define LOAD_NAME 91 -#define LOAD_SMALL_INT 92 -#define LOAD_SPECIAL 93 -#define LOAD_SUPER_ATTR 94 -#define MAKE_CELL 95 -#define MAP_ADD 96 -#define MATCH_CLASS 97 -#define POP_JUMP_IF_FALSE 98 -#define POP_JUMP_IF_NONE 99 -#define POP_JUMP_IF_NOT_NONE 100 -#define POP_JUMP_IF_TRUE 101 -#define RAISE_VARARGS 102 -#define RERAISE 103 -#define SEND 104 -#define SET_ADD 105 -#define SET_FUNCTION_ATTRIBUTE 106 -#define SET_UPDATE 107 -#define STORE_ATTR 108 -#define STORE_DEREF 109 -#define STORE_FAST 110 -#define STORE_FAST_LOAD_FAST 111 -#define STORE_FAST_STORE_FAST 112 -#define STORE_GLOBAL 113 -#define STORE_NAME 114 -#define SWAP 115 -#define UNPACK_EX 116 -#define UNPACK_SEQUENCE 117 -#define YIELD_VALUE 118 +#define GET_LEN 18 +#define GET_YIELD_FROM_ITER 19 +#define INTERPRETER_EXIT 20 +#define LOAD_BUILD_CLASS 21 +#define LOAD_LOCALS 22 +#define MAKE_FUNCTION 23 +#define MATCH_KEYS 24 +#define MATCH_MAPPING 25 +#define MATCH_SEQUENCE 26 +#define NOP 27 +#define NOT_TAKEN 28 +#define POP_EXCEPT 29 +#define POP_ITER 30 +#define POP_TOP 31 +#define PUSH_EXC_INFO 32 +#define PUSH_NULL 33 +#define RETURN_GENERATOR 34 +#define RETURN_VALUE 35 +#define SETUP_ANNOTATIONS 36 +#define STORE_SLICE 37 +#define STORE_SUBSCR 38 +#define TO_BOOL 39 +#define UNARY_INVERT 40 +#define UNARY_NEGATIVE 41 +#define UNARY_NOT 42 +#define WITH_EXCEPT_START 43 +#define BINARY_OP 44 +#define BUILD_INTERPOLATION 45 +#define BUILD_LIST 46 +#define BUILD_MAP 47 +#define BUILD_SET 48 +#define BUILD_SLICE 49 +#define BUILD_STRING 50 +#define BUILD_TUPLE 51 +#define CALL 52 +#define CALL_INTRINSIC_1 53 +#define CALL_INTRINSIC_2 54 +#define CALL_KW 55 +#define COMPARE_OP 56 +#define CONTAINS_OP 57 +#define CONVERT_VALUE 58 +#define COPY 59 +#define COPY_FREE_VARS 60 +#define DELETE_ATTR 61 +#define DELETE_DEREF 62 +#define DELETE_FAST 63 +#define DELETE_GLOBAL 64 +#define DELETE_NAME 65 +#define DICT_MERGE 66 +#define DICT_UPDATE 67 +#define END_ASYNC_FOR 68 +#define EXTENDED_ARG 69 +#define FOR_ITER 70 +#define GET_AWAITABLE 71 +#define IMPORT_FROM 72 +#define IMPORT_NAME 73 +#define IS_OP 74 +#define JUMP_BACKWARD 75 +#define JUMP_BACKWARD_NO_INTERRUPT 76 +#define JUMP_FORWARD 77 +#define LIST_APPEND 78 +#define LIST_EXTEND 79 +#define LOAD_ATTR 80 +#define LOAD_COMMON_CONSTANT 81 +#define LOAD_CONST 82 +#define LOAD_DEREF 83 +#define LOAD_FAST 84 +#define LOAD_FAST_AND_CLEAR 85 +#define LOAD_FAST_BORROW 86 +#define LOAD_FAST_BORROW_LOAD_FAST_BORROW 87 +#define LOAD_FAST_CHECK 88 +#define LOAD_FAST_LOAD_FAST 89 +#define LOAD_FROM_DICT_OR_DEREF 90 +#define LOAD_FROM_DICT_OR_GLOBALS 91 +#define LOAD_GLOBAL 92 +#define LOAD_NAME 93 +#define LOAD_SMALL_INT 94 +#define LOAD_SPECIAL 95 +#define LOAD_SUPER_ATTR 96 +#define MAKE_CELL 97 +#define MAP_ADD 98 +#define MATCH_CLASS 99 +#define POP_JUMP_IF_FALSE 100 +#define POP_JUMP_IF_NONE 101 +#define POP_JUMP_IF_NOT_NONE 102 +#define POP_JUMP_IF_TRUE 103 +#define RAISE_VARARGS 104 +#define RERAISE 105 +#define SEND 106 +#define SET_ADD 107 +#define SET_FUNCTION_ATTRIBUTE 108 +#define SET_UPDATE 109 +#define STORE_ATTR 110 +#define STORE_DEREF 111 +#define STORE_FAST 112 +#define STORE_FAST_LOAD_FAST 113 +#define STORE_FAST_STORE_FAST 114 +#define STORE_GLOBAL 115 +#define STORE_NAME 116 +#define SWAP 117 +#define UNPACK_EX 118 +#define UNPACK_SEQUENCE 119 +#define YIELD_VALUE 120 #define RESUME 128 #define BINARY_OP_ADD_FLOAT 129 #define BINARY_OP_ADD_INT 130 @@ -139,79 +141,78 @@ extern "C" { #define BINARY_OP_SUBSCR_DICT 135 #define BINARY_OP_SUBSCR_GETITEM 136 #define BINARY_OP_SUBSCR_LIST_INT 137 -#define BINARY_OP_SUBSCR_STR_INT 138 -#define BINARY_OP_SUBSCR_TUPLE_INT 139 -#define BINARY_OP_SUBTRACT_FLOAT 140 -#define BINARY_OP_SUBTRACT_INT 141 -#define CALL_ALLOC_AND_ENTER_INIT 142 -#define CALL_BOUND_METHOD_EXACT_ARGS 143 -#define CALL_BOUND_METHOD_GENERAL 144 -#define CALL_BUILTIN_CLASS 145 -#define CALL_BUILTIN_FAST 146 -#define CALL_BUILTIN_FAST_WITH_KEYWORDS 147 -#define CALL_BUILTIN_O 148 -#define CALL_ISINSTANCE 149 -#define CALL_KW_BOUND_METHOD 150 -#define CALL_KW_NON_PY 151 -#define CALL_KW_PY 152 -#define CALL_LEN 153 -#define CALL_LIST_APPEND 154 -#define CALL_METHOD_DESCRIPTOR_FAST 155 -#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 156 -#define CALL_METHOD_DESCRIPTOR_NOARGS 157 -#define CALL_METHOD_DESCRIPTOR_O 158 -#define CALL_NON_PY_GENERAL 159 -#define CALL_PY_EXACT_ARGS 160 -#define CALL_PY_GENERAL 161 -#define CALL_STR_1 162 -#define CALL_TUPLE_1 163 -#define CALL_TYPE_1 164 -#define COMPARE_OP_FLOAT 165 -#define COMPARE_OP_INT 166 -#define COMPARE_OP_STR 167 -#define CONTAINS_OP_DICT 168 -#define CONTAINS_OP_SET 169 -#define FOR_ITER_GEN 170 -#define FOR_ITER_LIST 171 -#define FOR_ITER_RANGE 172 -#define FOR_ITER_TUPLE 173 -#define JUMP_BACKWARD_JIT 174 -#define JUMP_BACKWARD_NO_JIT 175 -#define LOAD_ATTR_CLASS 176 -#define LOAD_ATTR_CLASS_WITH_METACLASS_CHECK 177 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 178 -#define LOAD_ATTR_INSTANCE_VALUE 179 -#define LOAD_ATTR_METHOD_LAZY_DICT 180 -#define LOAD_ATTR_METHOD_NO_DICT 181 -#define LOAD_ATTR_METHOD_WITH_VALUES 182 -#define LOAD_ATTR_MODULE 183 -#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 184 -#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 185 -#define LOAD_ATTR_PROPERTY 186 -#define LOAD_ATTR_SLOT 187 -#define LOAD_ATTR_WITH_HINT 188 -#define LOAD_CONST_IMMORTAL 189 -#define LOAD_CONST_MORTAL 190 -#define LOAD_GLOBAL_BUILTIN 191 -#define LOAD_GLOBAL_MODULE 192 -#define LOAD_SUPER_ATTR_ATTR 193 -#define LOAD_SUPER_ATTR_METHOD 194 -#define RESUME_CHECK 195 -#define SEND_GEN 196 -#define STORE_ATTR_INSTANCE_VALUE 197 -#define STORE_ATTR_SLOT 198 -#define STORE_ATTR_WITH_HINT 199 -#define STORE_SUBSCR_DICT 200 -#define STORE_SUBSCR_LIST_INT 201 -#define TO_BOOL_ALWAYS_TRUE 202 -#define TO_BOOL_BOOL 203 -#define TO_BOOL_INT 204 -#define TO_BOOL_LIST 205 -#define TO_BOOL_NONE 206 -#define TO_BOOL_STR 207 -#define UNPACK_SEQUENCE_LIST 208 -#define UNPACK_SEQUENCE_TUPLE 209 -#define UNPACK_SEQUENCE_TWO_TUPLE 210 +#define BINARY_OP_SUBSCR_LIST_SLICE 138 +#define BINARY_OP_SUBSCR_STR_INT 139 +#define BINARY_OP_SUBSCR_TUPLE_INT 140 +#define BINARY_OP_SUBTRACT_FLOAT 141 +#define BINARY_OP_SUBTRACT_INT 142 +#define CALL_ALLOC_AND_ENTER_INIT 143 +#define CALL_BOUND_METHOD_EXACT_ARGS 144 +#define CALL_BOUND_METHOD_GENERAL 145 +#define CALL_BUILTIN_CLASS 146 +#define CALL_BUILTIN_FAST 147 +#define CALL_BUILTIN_FAST_WITH_KEYWORDS 148 +#define CALL_BUILTIN_O 149 +#define CALL_ISINSTANCE 150 +#define CALL_KW_BOUND_METHOD 151 +#define CALL_KW_NON_PY 152 +#define CALL_KW_PY 153 +#define CALL_LEN 154 +#define CALL_LIST_APPEND 155 +#define CALL_METHOD_DESCRIPTOR_FAST 156 +#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 157 +#define CALL_METHOD_DESCRIPTOR_NOARGS 158 +#define CALL_METHOD_DESCRIPTOR_O 159 +#define CALL_NON_PY_GENERAL 160 +#define CALL_PY_EXACT_ARGS 161 +#define CALL_PY_GENERAL 162 +#define CALL_STR_1 163 +#define CALL_TUPLE_1 164 +#define CALL_TYPE_1 165 +#define COMPARE_OP_FLOAT 166 +#define COMPARE_OP_INT 167 +#define COMPARE_OP_STR 168 +#define CONTAINS_OP_DICT 169 +#define CONTAINS_OP_SET 170 +#define FOR_ITER_GEN 171 +#define FOR_ITER_LIST 172 +#define FOR_ITER_RANGE 173 +#define FOR_ITER_TUPLE 174 +#define JUMP_BACKWARD_JIT 175 +#define JUMP_BACKWARD_NO_JIT 176 +#define LOAD_ATTR_CLASS 177 +#define LOAD_ATTR_CLASS_WITH_METACLASS_CHECK 178 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 179 +#define LOAD_ATTR_INSTANCE_VALUE 180 +#define LOAD_ATTR_METHOD_LAZY_DICT 181 +#define LOAD_ATTR_METHOD_NO_DICT 182 +#define LOAD_ATTR_METHOD_WITH_VALUES 183 +#define LOAD_ATTR_MODULE 184 +#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 185 +#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 186 +#define LOAD_ATTR_PROPERTY 187 +#define LOAD_ATTR_SLOT 188 +#define LOAD_ATTR_WITH_HINT 189 +#define LOAD_GLOBAL_BUILTIN 190 +#define LOAD_GLOBAL_MODULE 191 +#define LOAD_SUPER_ATTR_ATTR 192 +#define LOAD_SUPER_ATTR_METHOD 193 +#define RESUME_CHECK 194 +#define SEND_GEN 195 +#define STORE_ATTR_INSTANCE_VALUE 196 +#define STORE_ATTR_SLOT 197 +#define STORE_ATTR_WITH_HINT 198 +#define STORE_SUBSCR_DICT 199 +#define STORE_SUBSCR_LIST_INT 200 +#define TO_BOOL_ALWAYS_TRUE 201 +#define TO_BOOL_BOOL 202 +#define TO_BOOL_INT 203 +#define TO_BOOL_LIST 204 +#define TO_BOOL_NONE 205 +#define TO_BOOL_STR 206 +#define UNPACK_SEQUENCE_LIST 207 +#define UNPACK_SEQUENCE_TUPLE 208 +#define UNPACK_SEQUENCE_TWO_TUPLE 209 #define INSTRUMENTED_END_FOR 234 #define INSTRUMENTED_POP_ITER 235 #define INSTRUMENTED_END_SEND 236 @@ -234,18 +235,19 @@ extern "C" { #define INSTRUMENTED_JUMP_BACKWARD 253 #define INSTRUMENTED_LINE 254 #define ENTER_EXECUTOR 255 -#define JUMP 256 -#define JUMP_IF_FALSE 257 -#define JUMP_IF_TRUE 258 -#define JUMP_NO_INTERRUPT 259 -#define LOAD_CLOSURE 260 -#define POP_BLOCK 261 -#define SETUP_CLEANUP 262 -#define SETUP_FINALLY 263 -#define SETUP_WITH 264 -#define STORE_FAST_MAYBE_NULL 265 +#define ANNOTATIONS_PLACEHOLDER 256 +#define JUMP 257 +#define JUMP_IF_FALSE 258 +#define JUMP_IF_TRUE 259 +#define JUMP_NO_INTERRUPT 260 +#define LOAD_CLOSURE 261 +#define POP_BLOCK 262 +#define SETUP_CLEANUP 263 +#define SETUP_FINALLY 264 +#define SETUP_WITH 265 +#define STORE_FAST_MAYBE_NULL 266 -#define HAVE_ARGUMENT 42 +#define HAVE_ARGUMENT 43 #define MIN_SPECIALIZED_OPCODE 129 #define MIN_INSTRUMENTED_OPCODE 234 diff --git a/Include/patchlevel.h b/Include/patchlevel.h index ed570864f6b..532873b51e6 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -18,13 +18,13 @@ /* Version parsed out into numeric values */ /*--start constants--*/ #define PY_MAJOR_VERSION 3 -#define PY_MINOR_VERSION 14 +#define PY_MINOR_VERSION 15 #define PY_MICRO_VERSION 0 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_ALPHA -#define PY_RELEASE_SERIAL 7 +#define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.14.0a7+" +#define PY_VERSION "3.15.0a0" /*--end constants--*/ diff --git a/Include/py_curses.h b/Include/py_curses.h index e11bfedb17d..0948aabedd4 100644 --- a/Include/py_curses.h +++ b/Include/py_curses.h @@ -75,10 +75,11 @@ extern "C" { /* Type declarations */ -typedef struct { +typedef struct PyCursesWindowObject { PyObject_HEAD WINDOW *win; char *encoding; + struct PyCursesWindowObject *orig; } PyCursesWindowObject; #define PyCurses_CAPSULE_NAME "_curses._C_API" @@ -108,6 +109,13 @@ static void **PyCurses_API; static const char catchall_ERR[] = "curses function returned ERR"; static const char catchall_NULL[] = "curses function returned NULL"; +#if defined(CURSES_MODULE) || defined(CURSES_PANEL_MODULE) +/* Error messages shared by the curses package */ +# define CURSES_ERROR_FORMAT "%s() returned %s" +# define CURSES_ERROR_VERBOSE_FORMAT "%s() (called by %s()) returned %s" +# define CURSES_ERROR_MUST_CALL_FORMAT "must call %s() first" +#endif + #ifdef __cplusplus } #endif diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h index de1bcb1d2cb..4b3474035ce 100644 --- a/Include/pylifecycle.h +++ b/Include/pylifecycle.h @@ -35,15 +35,8 @@ PyAPI_FUNC(int) Py_BytesMain(int argc, char **argv); /* In pathconfig.c */ Py_DEPRECATED(3.11) PyAPI_FUNC(void) Py_SetProgramName(const wchar_t *); -Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetProgramName(void); - Py_DEPRECATED(3.11) PyAPI_FUNC(void) Py_SetPythonHome(const wchar_t *); -Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetPythonHome(void); -Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void); -Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetPrefix(void); -Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetExecPrefix(void); -Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetPath(void); #ifdef MS_WINDOWS int _Py_CheckPython3(void); #endif diff --git a/Include/pymacro.h b/Include/pymacro.h index a82f347866e..218987a80b0 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -23,6 +23,47 @@ # define static_assert _Static_assert #endif + +// _Py_ALIGN_AS: this compiler's spelling of `alignas` keyword, +// We currently use alignas for free-threaded builds only; additional compat +// checking would be great before we add it to the default build. +// Standards/compiler support: +// - `alignas` is a keyword in C23 and C++11. +// - `_Alignas` is a keyword in C11 +// - GCC & clang has __attribute__((aligned)) +// (use that for older standards in pedantic mode) +// - MSVC has __declspec(align) +// - `_Alignas` is common C compiler extension +// Older compilers may name it differently; to allow compilation on such +// unsupported platforms, we don't redefine _Py_ALIGN_AS if it's already +// defined. Note that defining it wrong (including defining it to nothing) will +// cause ABI incompatibilities. +#ifdef Py_GIL_DISABLED +# ifndef _Py_ALIGN_AS +# ifdef __cplusplus +# if __cplusplus >= 201103L +# define _Py_ALIGN_AS(V) alignas(V) +# elif defined(__GNUC__) || defined(__clang__) +# define _Py_ALIGN_AS(V) __attribute__((aligned(V))) +# elif defined(_MSC_VER) +# define _Py_ALIGN_AS(V) __declspec(align(V)) +# else +# define _Py_ALIGN_AS(V) alignas(V) +# endif +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L +# define _Py_ALIGN_AS(V) alignas(V) +# elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +# define _Py_ALIGN_AS(V) _Alignas(V) +# elif (defined(__GNUC__) || defined(__clang__)) +# define _Py_ALIGN_AS(V) __attribute__((aligned(V))) +# elif defined(_MSC_VER) +# define _Py_ALIGN_AS(V) __declspec(align(V)) +# else +# define _Py_ALIGN_AS(V) _Alignas(V) +# endif +# endif +#endif + /* Minimum value between x and y */ #define Py_MIN(x, y) (((x) > (y)) ? (y) : (x)) diff --git a/Include/pyport.h b/Include/pyport.h index 2a7192c2c55..3eac119bf8e 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -36,6 +36,16 @@ // Macro to use the more powerful/dangerous C-style cast even in C++. #define _Py_CAST(type, expr) ((type)(expr)) +// Cast a function to another function type T. +// +// The macro first casts the function to the "void func(void)" type +// to prevent compiler warnings. +// +// Note that using this cast only prevents the compiler from emitting +// warnings, but does not prevent an undefined behavior at runtime if +// the original function signature is not respected. +#define _Py_FUNC_CAST(T, func) _Py_CAST(T, _Py_CAST(void(*)(void), (func))) + // Static inline functions should use _Py_NULL rather than using directly NULL // to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, // _Py_NULL is defined as nullptr. @@ -675,4 +685,19 @@ extern "C" { #endif +// _Py_NONSTRING: The nonstring variable attribute specifies that an object or +// member declaration with type array of char, signed char, or unsigned char, +// or pointer to such a type is intended to store character arrays that do not +// necessarily contain a terminating NUL. +// +// Usage: +// +// char name [8] _Py_NONSTRING; +#if _Py__has_attribute(nonstring) +# define _Py_NONSTRING __attribute__((nonstring)) +#else +# define _Py_NONSTRING +#endif + + #endif /* Py_PYPORT_H */ diff --git a/Include/pythonrun.h b/Include/pythonrun.h index 4d459cb92e3..716c4ab3c81 100644 --- a/Include/pythonrun.h +++ b/Include/pythonrun.h @@ -26,17 +26,22 @@ PyAPI_DATA(int) (*PyOS_InputHook)(void); * apart. In practice, that means it must be larger than the C * stack consumption of PyEval_EvalDefault */ #if defined(_Py_ADDRESS_SANITIZER) || defined(_Py_THREAD_SANITIZER) -# define PYOS_STACK_MARGIN 4096 +# define PYOS_LOG2_STACK_MARGIN 12 #elif defined(Py_DEBUG) && defined(WIN32) -# define PYOS_STACK_MARGIN 4096 -#elif defined(__wasi__) - /* Web assembly has two stacks, so this isn't really a size */ -# define PYOS_STACK_MARGIN 500 +# define PYOS_LOG2_STACK_MARGIN 12 #else -# define PYOS_STACK_MARGIN 2048 +# define PYOS_LOG2_STACK_MARGIN 11 #endif +#define PYOS_STACK_MARGIN (1 << PYOS_LOG2_STACK_MARGIN) #define PYOS_STACK_MARGIN_BYTES (PYOS_STACK_MARGIN * sizeof(void *)) +#if SIZEOF_VOID_P == 8 +#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 3) +#else +#define PYOS_STACK_MARGIN_SHIFT (PYOS_LOG2_STACK_MARGIN + 2) +#endif + + #if defined(WIN32) #define USE_STACKCHECK #endif diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 2ce3a008b71..b72d581ec25 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -341,46 +341,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_Decode( const char *errors /* error handling */ ); -/* Decode a Unicode object unicode and return the result as Python - object. - - This API is DEPRECATED. The only supported standard encoding is rot13. - Use PyCodec_Decode() to decode with rot13 and non-standard codecs - that decode from str. */ - -Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( - PyObject *unicode, /* Unicode object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Decode a Unicode object unicode and return the result as Unicode - object. - - This API is DEPRECATED. The only supported standard encoding is rot13. - Use PyCodec_Decode() to decode with rot13 and non-standard codecs - that decode from str to str. */ - -Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( - PyObject *unicode, /* Unicode object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a Unicode object and returns the result as Python - object. - - This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString() - since all standard encodings (except rot13) encode str to bytes. - Use PyCodec_Encode() for encoding with rot13 and non-standard codecs - that encode form str to non-bytes. */ - -Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( - PyObject *unicode, /* Unicode object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - /* Encodes a Unicode object and returns the result as Python string object. */ @@ -390,19 +350,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( const char *errors /* error handling */ ); -/* Encodes a Unicode object and returns the result as Unicode - object. - - This API is DEPRECATED. The only supported standard encodings is rot13. - Use PyCodec_Encode() to encode with rot13 and non-standard codecs - that encode from str to str. */ - -Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( - PyObject *unicode, /* Unicode object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - /* Build an encoding map. */ PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 8ca19a42b91..02bbdf6071f 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -505,8 +505,8 @@ * [Python/ast.c](../Python/ast.c): Used for validating the AST. - * [Python/ast_opt.c](../Python/ast_opt.c): - Optimizes the AST. + * [Python/ast_preprocess.c](../Python/ast_preprocess.c): + Preprocesses the AST before compiling. * [Python/ast_unparse.c](../Python/ast_unparse.c): Converts the AST expression node back into a string (for string annotations). diff --git a/InternalDocs/frames.md b/InternalDocs/frames.md index 2f0cc7967f3..804d7436018 100644 --- a/InternalDocs/frames.md +++ b/InternalDocs/frames.md @@ -11,7 +11,7 @@ # Frames previous frame, etc. The definition of the `_PyInterpreterFrame` struct is in -[Include/internal/pycore_frame.h](../Include/internal/pycore_frame.h). +[Include/internal/pycore_interpframe_structs.h](../Include/internal/pycore_interpframe_structs.h). # Allocation @@ -21,8 +21,8 @@ # Allocation (see `_PyThreadState_PushFrame` in [Python/pystate.c](../Python/pystate.c)). Frames of generators and coroutines are embedded in the generator and coroutine -objects, so are not allocated in the per-thread stack. See `PyGenObject` in -[Include/internal/pycore_genobject.h](../Include/internal/pycore_genobject.h). +objects, so are not allocated in the per-thread stack. See `_PyGenObject` in +[Include/internal/pycore_interpframe_structs.h](../Include/internal/pycore_interpframe_structs.h). ## Layout diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index e4cb9e45c9e..4da6cd47dc8 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -417,7 +417,7 @@ transitive closure cannot contain any partial cycles. We can exclude scanned objects, as they must have been reachable when scanned. If a scanned object becomes part of an unreachable cycle after being scanned, it will -not be collected this at this time, but it will be collected in the next full scavenge. +not be collected at this time, but it will be collected in the next full scavenge. > [!NOTE] > The GC implementation for the free-threaded build does not use incremental collection. @@ -477,17 +477,24 @@ ``` -Optimization: visiting reachable objects -======================================== +Optimization: excluding reachable objects +========================================= -An object cannot be garbage if it can be reached. +An object cannot be garbage if it can be reached. To avoid having to identify +reference cycles across the whole heap, we can reduce the amount of work done +considerably by first identifying objects reachable from objects known to be +alive. These objects are excluded from the normal cyclic detection process. -To avoid having to identify reference cycles across the whole heap, we can -reduce the amount of work done considerably by first moving most reachable objects -to the `visited` space. Empirically, most reachable objects can be reached from a -small set of global objects and local variables. -This step does much less work per object, so reduces the time spent -performing garbage collection by at least half. +The default and free-threaded build both implement this optimization but in +slightly different ways. + +Finding reachable objects for the default build GC +-------------------------------------------------- + +This works by first moving most reachable objects to the `visited` space. +Empirically, most reachable objects can be reached from a small set of global +objects and local variables. This step does much less work per object, so +reduces the time spent performing garbage collection by at least half. > [!NOTE] > Objects that are not determined to be reachable by this pass are not necessarily @@ -515,6 +522,171 @@ added to the working set. Then the above algorithm is repeated, starting from step 2. + +Finding reachable objects for the free-threaded GC +-------------------------------------------------- + +Within the `gc_free_threading.c` implementation, this is known as the "mark +alive" pass or phase. It is similar in concept to what is done for the default +build GC. Rather than moving objects between double-linked lists, the +free-threaded GC uses a flag in `ob_gc_bits` to track if an object is +found to be definitely alive (not garbage). + +To find objects reachable from known alive objects, known as the "roots", the +`gc_mark_alive_from_roots()` function is used. Root objects include +`interp->sysdict` (the `sys` module dictionary), `interp->builtins`, and +`interp->types`. Also included are all objects referred to by active Python +frames. These objects and the transitive closure of objects reachable from +them have `_PyGC_BITS_ALIVE` set. Any object with that bit set is excluded +from the rest of the cyclic garbage detection process, since we know it cannot +be unreachable. + +> [!NOTE] +> If the `gc.freeze()` function has been used, this phase of the collector is +> skipped. That is done for two reasons. First, it is unlikely to be a +> performance win if most of the objects have been marked as frozen. As such, +> they would be excluded for the cyclic garbage detection, without this extra +> work anyhow. Second, one of the purposes of using `gc.freeze()` is to avoid +> dirtying the memory pages holding frozen objects. If this phase was executed, +> the toggling of the `ob_gc_bits` flags would dirty pages and defeat that. + +Software prefetch hinting +------------------------- + +To speed up the "mark alive" phase of the free-threaded GC, an additional +optimization, known as software prefetching, is used. The GC will execute +explicit prefetch CPU instructions in order to reduce the latency due to +loading data from main memory. This added complexity can pay off since main +memory is so much slower compared to accessing data in the CPU cache. This +is enabled only if the number of long-lived objects exceeds a threshold. If +the set of objects being garbage collected is small, the accessed memory is +likely to fit entirely in the CPU cache and software prefetch is not helpful. + +The details of this optimization are intricate, with the source code being the +best reference. However, the rest of this section gives a high level +description of how it works and explains some design decisions. + +Software prefetching is only used during the "mark alive" phase of the GC. +Specifically, when we are performing the transitive closure of the "alive" +status of objects (i.e. objects reachable from known alive objects, known as +roots). For each object we find, we need to traverse all objects directly +reachable from that object. If that set of referred objects is in scattered +locations of memory, the hardware prefetch is unlikely to predict the next +accessed memory location. + +Making software prefetch work well hinges on a key principle: allow enough time +between issuing the prefetch instruction for a memory location and actually +accessing that location's data. We can call that time difference the prefetch +window. If the window is too large, we fill up the CPU caches with data that's +not needed yet. Worse, the data might be discarded from the cache before we +actually use it. If the window is too small then the memory system does not +have enough time to finish loading the memory and the CPU will have to wait. +The window is indirectly tuned by the prefetch buffer parameters. The buffer +will be explained next. + +The prefetch buffer is a FIFO queue of fixed size. When we enqueue an object +reference into the buffer, we also issue a software prefetch instruction for +that memory location. When we dequeue an object reference from the buffer, we +assume or hope that enough time has elapsed so that the memory has been loaded +into the cache. This is the mechanism that provides the window. + +When performing the transitive closure of "alive" status, the set of objects +yet to visit are stored in one of two places. First, they can be stored in the +prefech buffer. Second, there is a LIFO stack, of unlimited size. When object +references are found using `tp_traverse`, they are enqueued in the buffer if +it is not full, otherwise they are pushed to the stack. + +We must take special care not to access the memory referred to by an object +pointer until we take that reference out of the prefetch buffer. That means we +cannot check if an object is a "container" (if the `PyObject_IS_GC()` test is +true) or if the object already has the "alive" flag set. Both of those tests +would require that the object memory is accessed. There are some additional +elaborations that try to keep the buffer filled to the optimal size but to keep +things simple we will omit those details here. Not discussed in details are +"spans", which help reduce the amount of stack used and make it easier to +control the size of the buffer. + +As mentioned, the prefetch window is the time delay between the issue of the +prefetch instruction, on buffer enqueue, and the memory access, after buffer +dequeue. It is tuned by adjusting some buffer parameters. If processing time +were equal for every object then the buffer length would be proportional to the +window. Since processing each object can actually take a variable amount of +time, the relation between the buffer length and the prefetch window is only +approximate. However, this proportional relationship is assumed to hold true +and we avoid the overhead of actually measuring object processing times. + +The relevant parameters are the maximum buffer size and the low and high +thresholds for filling. The buffer parameters are set as follows: maximum +length is 256, low threshold is 8, high threshold is 16. These parameters are +used as follows. If the buffer has reached the maximum size, new object +pointers found while following references are pushed to the stack, rather than +put in the buffer. When dequeuing objects from the buffer, we will "prime" the +buffer if the current length drops below the low threshold. Priming means +popping objects from the stack and enqueuing them into the buffer. While +priming, we will fill it only until the high threshold is reached. + +To measure the effectiveness of the buffer, some benchmark programs were run +with a trace log of memory location prefetch and access instructions. The +prefetch window for each object processed was computed from the trace log. +Each enqueue and dequeue operation were treated as taking one unit of time. +Time to actually process the object was assumed to be zero. A histogram of the +windows is shown below. These traces suggest the buffer length is mostly being +kept between the low and high thresholds, which is what we want. Variations of +the buffer parameters were benchmarked and the best performing parameters were +chosen. Obviously it is unlikely these parameters will be optimal for all +hardware and programs. + +``` +Prefetch window stats +mean 52.1 +median 14.0 +max 256 + 25.60 |65,304 | ****************************** + 51.20 |5,590 | ** + 76.80 |3,562 | * + 102.40 |2,683 | * + 128.00 |2,278 | * + 153.60 |2,285 | * + 179.20 |2,377 | * + 204.80 |2,238 | * + 230.40 |2,753 | * + 256.00 |5,930 | ** +-------- |------- | ------- + N= |95,000 +``` + +Using software prefetch instructions is only a win if the set of objects being +examined by the GC does not fit into CPU caches. Otherwise, using the buffer +and prefetch instructions is just overhead. Using the long lived object count +seems a good estimate of if things will fit in the cache. On 64-bit platforms, +the minimum object size is 32 bytes. A 4MB L2 cache would hold about 130,000 +objects. + +The current threshold for enabling prefetch is that the number of long-lived +objects must exceed 200,000. Based on benchmarking, this seems in the range +where prefetch becomes a net gain. Obviously it depends on hardware details +and also the "shape" of the object graph. For example, your object graph may +be constructed by linearly allocating objects in memory. Then, traversing the +object graph might naturally result in mostly ordered memory access. In that +case, the hardware prefetch is likely to do a nearly perfect job without any +software prefetch hints. + +This optimization, as of March 2025, was tuned on the following hardware +platforms: + +- Apple M3 Pro, 32 GB RAM, 192+128 KB L1, 16 MB L2, compiled with Clang 19 +- AMD Ryzen 5 7600X, 64 GB RAM, 384 KB L1, 6 GB L2, 32 MB L3, compiled with GCC 12.2.0 + +Benchmarking the effectiveness of this optimization is particularly difficult. +It depends both on hardware details, like CPU cache sizes and memory latencies, +and the specifics of the program's memory access patterns, where objects are +located in memory and in what order they are accessed during the mark alive +phase. When the program's memory access patterns are favourable, working set +of data larger than the CPU cache, objects allocated in such a way that access +order is not linear, then the speedup from using software prefetching is in the +range of 20% to 40% faster for the entire full GC collection. + + Optimization: reusing fields to save memory =========================================== diff --git a/InternalDocs/generators.md b/InternalDocs/generators.md index 87fbb912368..979a5b51521 100644 --- a/InternalDocs/generators.md +++ b/InternalDocs/generators.md @@ -28,9 +28,9 @@ The `frame` of a generator is embedded in the generator object struct as a [`_PyInterpreterFrame`](frames.md) (see `_PyGenObject_HEAD` in -[`pycore_genobject.h`](../Include/internal/pycore_genobject.h)). +[`pycore_interpframe_structs.h`](../Include/internal/pycore_interpframe_structs.h)). This means that we can get the frame from the generator or the generator -from the frame (see `_PyGen_GetGeneratorFromFrame` in the same file). +from the frame (see `_PyGen_GetGeneratorFromFrame` in [`pycore_genobject.h`](../Include/internal/pycore_genobject.h)). Other fields of the generator struct include metadata (such as the name of the generator function) and runtime state information (such as whether its frame is executing, suspended, cleared, etc.). diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md index be47efe2435..1d0ffe6d40d 100644 --- a/InternalDocs/parser.md +++ b/InternalDocs/parser.md @@ -97,7 +97,7 @@ ------------------------------------------- Although PEG may look like EBNF, its meaning is quite different. The fact -that the alternatives are ordered in a PEG grammer (which is at the core of +that the alternatives are ordered in a PEG grammar (which is at the core of how PEG parsers work) has deep consequences, other than removing ambiguity. If a rule has two alternatives and the first of them succeeds, the second one is diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py index 56d9e935dd9..c25066eb107 100644 --- a/Lib/_ast_unparse.py +++ b/Lib/_ast_unparse.py @@ -573,21 +573,11 @@ def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): quote_type = quote_types[0] self.write(f"{quote_type}{string}{quote_type}") - def visit_JoinedStr(self, node): - self.write("f") - - fstring_parts = [] - for value in node.values: - with self.buffered() as buffer: - self._write_fstring_inner(value) - fstring_parts.append( - ("".join(buffer), isinstance(value, Constant)) - ) - - new_fstring_parts = [] + def _ftstring_helper(self, parts): + new_parts = [] quote_types = list(_ALL_QUOTES) fallback_to_repr = False - for value, is_constant in fstring_parts: + for value, is_constant in parts: if is_constant: value, new_quote_types = self._str_literal_helper( value, @@ -606,30 +596,71 @@ def visit_JoinedStr(self, node): new_quote_types = [q for q in quote_types if q not in value] if new_quote_types: quote_types = new_quote_types - new_fstring_parts.append(value) + new_parts.append(value) if fallback_to_repr: # If we weren't able to find a quote type that works for all parts # of the JoinedStr, fallback to using repr and triple single quotes. quote_types = ["'''"] - new_fstring_parts.clear() - for value, is_constant in fstring_parts: + new_parts.clear() + for value, is_constant in parts: if is_constant: value = repr('"' + value) # force repr to use single quotes expected_prefix = "'\"" assert value.startswith(expected_prefix), repr(value) value = value[len(expected_prefix):-1] - new_fstring_parts.append(value) + new_parts.append(value) - value = "".join(new_fstring_parts) + value = "".join(new_parts) quote_type = quote_types[0] self.write(f"{quote_type}{value}{quote_type}") - def _write_fstring_inner(self, node, is_format_spec=False): + def _write_ftstring(self, values, prefix): + self.write(prefix) + fstring_parts = [] + for value in values: + with self.buffered() as buffer: + self._write_ftstring_inner(value) + fstring_parts.append( + ("".join(buffer), isinstance(value, Constant)) + ) + self._ftstring_helper(fstring_parts) + + def _tstring_helper(self, node): + if not node.values: + self._write_ftstring([], "t") + return + last_idx = 0 + for i, value in enumerate(node.values): + # This can happen if we have an implicit concat of a t-string + # with an f-string + if isinstance(value, FormattedValue): + if i > last_idx: + # Write t-string until here + self._write_ftstring(node.values[last_idx:i], "t") + self.write(" ") + # Write f-string with the current formatted value + self._write_ftstring([node.values[i]], "f") + if i + 1 < len(node.values): + # Only add a space if there are more values after this + self.write(" ") + last_idx = i + 1 + + if last_idx < len(node.values): + # Write t-string from last_idx to end + self._write_ftstring(node.values[last_idx:], "t") + + def visit_JoinedStr(self, node): + self._write_ftstring(node.values, "f") + + def visit_TemplateStr(self, node): + self._tstring_helper(node) + + def _write_ftstring_inner(self, node, is_format_spec=False): if isinstance(node, JoinedStr): # for both the f-string itself, and format_spec for value in node.values: - self._write_fstring_inner(value, is_format_spec=is_format_spec) + self._write_ftstring_inner(value, is_format_spec=is_format_spec) elif isinstance(node, Constant) and isinstance(node.value, str): value = node.value.replace("{", "{{").replace("}", "}}") @@ -641,17 +672,22 @@ def _write_fstring_inner(self, node, is_format_spec=False): self.write(value) elif isinstance(node, FormattedValue): self.visit_FormattedValue(node) + elif isinstance(node, Interpolation): + self.visit_Interpolation(node) else: raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") - def visit_FormattedValue(self, node): - def unparse_inner(inner): - unparser = type(self)() - unparser.set_precedence(_Precedence.TEST.next(), inner) - return unparser.visit(inner) + def _unparse_interpolation_value(self, inner): + unparser = type(self)() + unparser.set_precedence(_Precedence.TEST.next(), inner) + return unparser.visit(inner) + def _write_interpolation(self, node, is_interpolation=False): with self.delimit("{", "}"): - expr = unparse_inner(node.value) + if is_interpolation: + expr = node.str + else: + expr = self._unparse_interpolation_value(node.value) if expr.startswith("{"): # Separate pair of opening brackets as "{ {" self.write(" ") @@ -660,7 +696,13 @@ def unparse_inner(inner): self.write(f"!{chr(node.conversion)}") if node.format_spec: self.write(":") - self._write_fstring_inner(node.format_spec, is_format_spec=True) + self._write_ftstring_inner(node.format_spec, is_format_spec=True) + + def visit_FormattedValue(self, node): + self._write_interpolation(node) + + def visit_Interpolation(self, node): + self._write_interpolation(node, is_interpolation=True) def visit_Name(self, node): self.write(node.id) diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index 06667b7434c..51263d696a1 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -485,10 +485,10 @@ def __new__(cls, origin, args): def __repr__(self): if len(self.__args__) == 2 and _is_param_expr(self.__args__[0]): return super().__repr__() - from annotationlib import value_to_string + from annotationlib import type_repr return (f'collections.abc.Callable' - f'[[{", ".join([value_to_string(a) for a in self.__args__[:-1]])}], ' - f'{value_to_string(self.__args__[-1])}]') + f'[[{", ".join([type_repr(a) for a in self.__args__[:-1]])}], ' + f'{type_repr(self.__args__[-1])}]') def __reduce__(self): args = self.__args__ diff --git a/Lib/_colorize.py b/Lib/_colorize.py index 9eb6f0933b8..4a310a40235 100644 --- a/Lib/_colorize.py +++ b/Lib/_colorize.py @@ -1,13 +1,17 @@ -from __future__ import annotations import io import os import sys +from collections.abc import Callable, Iterator, Mapping +from dataclasses import dataclass, field, Field + COLORIZE = True + # types if False: - from typing import IO + from typing import IO, Self, ClassVar + _theme: Theme class ANSIColors: @@ -17,11 +21,13 @@ class ANSIColors: BLUE = "\x1b[34m" CYAN = "\x1b[36m" GREEN = "\x1b[32m" + GREY = "\x1b[90m" MAGENTA = "\x1b[35m" RED = "\x1b[31m" WHITE = "\x1b[37m" # more like LIGHT GRAY YELLOW = "\x1b[33m" + BOLD = "\x1b[1m" BOLD_BLACK = "\x1b[1;30m" # DARK GRAY BOLD_BLUE = "\x1b[1;34m" BOLD_CYAN = "\x1b[1;36m" @@ -60,13 +66,195 @@ class ANSIColors: INTENSE_BACKGROUND_YELLOW = "\x1b[103m" +ColorCodes = set() NoColors = ANSIColors() -for attr in dir(NoColors): +for attr, code in ANSIColors.__dict__.items(): if not attr.startswith("__"): + ColorCodes.add(code) setattr(NoColors, attr, "") +# +# Experimental theming support (see gh-133346) +# + +# - Create a theme by copying an existing `Theme` with one or more sections +# replaced, using `default_theme.copy_with()`; +# - create a theme section by copying an existing `ThemeSection` with one or +# more colors replaced, using for example `default_theme.syntax.copy_with()`; +# - create a theme from scratch by instantiating a `Theme` data class with +# the required sections (which are also dataclass instances). +# +# Then call `_colorize.set_theme(your_theme)` to set it. +# +# Put your theme configuration in $PYTHONSTARTUP for the interactive shell, +# or sitecustomize.py in your virtual environment or Python installation for +# other uses. Your applications can call `_colorize.set_theme()` too. +# +# Note that thanks to the dataclasses providing default values for all fields, +# creating a new theme or theme section from scratch is possible without +# specifying all keys. +# +# For example, here's a theme that makes punctuation and operators less prominent: +# +# try: +# from _colorize import set_theme, default_theme, Syntax, ANSIColors +# except ImportError: +# pass +# else: +# theme_with_dim_operators = default_theme.copy_with( +# syntax=Syntax(op=ANSIColors.INTENSE_BLACK), +# ) +# set_theme(theme_with_dim_operators) +# del set_theme, default_theme, Syntax, ANSIColors, theme_with_dim_operators +# +# Guarding the import ensures that your .pythonstartup file will still work in +# Python 3.13 and older. Deleting the variables ensures they don't remain in your +# interactive shell's global scope. + +class ThemeSection(Mapping[str, str]): + """A mixin/base class for theme sections. + + It enables dictionary access to a section, as well as implements convenience + methods. + """ + + # The two types below are just that: types to inform the type checker that the + # mixin will work in context of those fields existing + __dataclass_fields__: ClassVar[dict[str, Field[str]]] + _name_to_value: Callable[[str], str] + + def __post_init__(self) -> None: + name_to_value = {} + for color_name in self.__dataclass_fields__: + name_to_value[color_name] = getattr(self, color_name) + super().__setattr__('_name_to_value', name_to_value.__getitem__) + + def copy_with(self, **kwargs: str) -> Self: + color_state: dict[str, str] = {} + for color_name in self.__dataclass_fields__: + color_state[color_name] = getattr(self, color_name) + color_state.update(kwargs) + return type(self)(**color_state) + + @classmethod + def no_colors(cls) -> Self: + color_state: dict[str, str] = {} + for color_name in cls.__dataclass_fields__: + color_state[color_name] = "" + return cls(**color_state) + + def __getitem__(self, key: str) -> str: + return self._name_to_value(key) + + def __len__(self) -> int: + return len(self.__dataclass_fields__) + + def __iter__(self) -> Iterator[str]: + return iter(self.__dataclass_fields__) + + +@dataclass(frozen=True) +class Argparse(ThemeSection): + usage: str = ANSIColors.BOLD_BLUE + prog: str = ANSIColors.BOLD_MAGENTA + prog_extra: str = ANSIColors.MAGENTA + heading: str = ANSIColors.BOLD_BLUE + summary_long_option: str = ANSIColors.CYAN + summary_short_option: str = ANSIColors.GREEN + summary_label: str = ANSIColors.YELLOW + summary_action: str = ANSIColors.GREEN + long_option: str = ANSIColors.BOLD_CYAN + short_option: str = ANSIColors.BOLD_GREEN + label: str = ANSIColors.BOLD_YELLOW + action: str = ANSIColors.BOLD_GREEN + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Syntax(ThemeSection): + prompt: str = ANSIColors.BOLD_MAGENTA + keyword: str = ANSIColors.BOLD_BLUE + builtin: str = ANSIColors.CYAN + comment: str = ANSIColors.RED + string: str = ANSIColors.GREEN + number: str = ANSIColors.YELLOW + op: str = ANSIColors.RESET + definition: str = ANSIColors.BOLD + soft_keyword: str = ANSIColors.BOLD_BLUE + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Traceback(ThemeSection): + type: str = ANSIColors.BOLD_MAGENTA + message: str = ANSIColors.MAGENTA + filename: str = ANSIColors.MAGENTA + line_no: str = ANSIColors.MAGENTA + frame: str = ANSIColors.MAGENTA + error_highlight: str = ANSIColors.BOLD_RED + error_range: str = ANSIColors.RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Unittest(ThemeSection): + passed: str = ANSIColors.GREEN + warn: str = ANSIColors.YELLOW + fail: str = ANSIColors.RED + fail_info: str = ANSIColors.BOLD_RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Theme: + """A suite of themes for all sections of Python. + + When adding a new one, remember to also modify `copy_with` and `no_colors` + below. + """ + argparse: Argparse = field(default_factory=Argparse) + syntax: Syntax = field(default_factory=Syntax) + traceback: Traceback = field(default_factory=Traceback) + unittest: Unittest = field(default_factory=Unittest) + + def copy_with( + self, + *, + argparse: Argparse | None = None, + syntax: Syntax | None = None, + traceback: Traceback | None = None, + unittest: Unittest | None = None, + ) -> Self: + """Return a new Theme based on this instance with some sections replaced. + + Themes are immutable to protect against accidental modifications that + could lead to invalid terminal states. + """ + return type(self)( + argparse=argparse or self.argparse, + syntax=syntax or self.syntax, + traceback=traceback or self.traceback, + unittest=unittest or self.unittest, + ) + + @classmethod + def no_colors(cls) -> Self: + """Return a new Theme where colors in all sections are empty strings. + + This allows writing user code as if colors are always used. The color + fields will be ANSI color code strings when colorization is desired + and possible, and empty strings otherwise. + """ + return cls( + argparse=Argparse.no_colors(), + syntax=Syntax.no_colors(), + traceback=Traceback.no_colors(), + unittest=Unittest.no_colors(), + ) + + def get_colors( colorize: bool = False, *, file: IO[str] | IO[bytes] | None = None ) -> ANSIColors: @@ -76,6 +264,13 @@ def get_colors( return NoColors +def decolor(text: str) -> str: + """Remove ANSI color codes from a string.""" + for code in ColorCodes: + text = text.replace(code, "") + return text + + def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: if file is None: file = sys.stdout @@ -110,3 +305,42 @@ def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: return os.isatty(file.fileno()) except io.UnsupportedOperation: return hasattr(file, "isatty") and file.isatty() + + +default_theme = Theme() +theme_no_color = default_theme.no_colors() + + +def get_theme( + *, + tty_file: IO[str] | IO[bytes] | None = None, + force_color: bool = False, + force_no_color: bool = False, +) -> Theme: + """Returns the currently set theme, potentially in a zero-color variant. + + In cases where colorizing is not possible (see `can_colorize`), the returned + theme contains all empty strings in all color definitions. + See `Theme.no_colors()` for more information. + + It is recommended not to cache the result of this function for extended + periods of time because the user might influence theme selection by + the interactive shell, a debugger, or application-specific code. The + environment (including environment variable state and console configuration + on Windows) can also change in the course of the application life cycle. + """ + if force_color or (not force_no_color and can_colorize(file=tty_file)): + return _theme + return theme_no_color + + +def set_theme(t: Theme) -> None: + global _theme + + if not isinstance(t, Theme): + raise ValueError(f"Expected Theme object, found {t}") + + _theme = t + + +set_theme(default_theme) diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py index 439f8c02f4b..a9813264324 100644 --- a/Lib/_compat_pickle.py +++ b/Lib/_compat_pickle.py @@ -175,7 +175,6 @@ 'SimpleDialog': 'tkinter.simpledialog', 'DocXMLRPCServer': 'xmlrpc.server', 'SimpleHTTPServer': 'http.server', - 'CGIHTTPServer': 'http.server', # For compatibility with broken pickles saved in old Python 3 versions 'UserDict': 'collections', 'UserList': 'collections', @@ -217,8 +216,6 @@ ('DocXMLRPCServer', 'DocCGIXMLRPCRequestHandler'), ('http.server', 'SimpleHTTPRequestHandler'): ('SimpleHTTPServer', 'SimpleHTTPRequestHandler'), - ('http.server', 'CGIHTTPRequestHandler'): - ('CGIHTTPServer', 'CGIHTTPRequestHandler'), ('_socket', 'socket'): ('socket', '_socketobject'), }) diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py index 15900265a01..f168d169a32 100644 --- a/Lib/_opcode_metadata.py +++ b/Lib/_opcode_metadata.py @@ -6,10 +6,6 @@ _specializations = { "RESUME": [ "RESUME_CHECK", ], - "LOAD_CONST": [ - "LOAD_CONST_MORTAL", - "LOAD_CONST_IMMORTAL", - ], "TO_BOOL": [ "TO_BOOL_ALWAYS_TRUE", "TO_BOOL_BOOL", @@ -27,6 +23,7 @@ _specializations = { "BINARY_OP_SUBTRACT_FLOAT", "BINARY_OP_ADD_UNICODE", "BINARY_OP_SUBSCR_LIST_INT", + "BINARY_OP_SUBSCR_LIST_SLICE", "BINARY_OP_SUBSCR_TUPLE_INT", "BINARY_OP_SUBSCR_STR_INT", "BINARY_OP_SUBSCR_DICT", @@ -133,79 +130,78 @@ _specialized_opmap = { 'BINARY_OP_SUBSCR_DICT': 135, 'BINARY_OP_SUBSCR_GETITEM': 136, 'BINARY_OP_SUBSCR_LIST_INT': 137, - 'BINARY_OP_SUBSCR_STR_INT': 138, - 'BINARY_OP_SUBSCR_TUPLE_INT': 139, - 'BINARY_OP_SUBTRACT_FLOAT': 140, - 'BINARY_OP_SUBTRACT_INT': 141, - 'CALL_ALLOC_AND_ENTER_INIT': 142, - 'CALL_BOUND_METHOD_EXACT_ARGS': 143, - 'CALL_BOUND_METHOD_GENERAL': 144, - 'CALL_BUILTIN_CLASS': 145, - 'CALL_BUILTIN_FAST': 146, - 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 147, - 'CALL_BUILTIN_O': 148, - 'CALL_ISINSTANCE': 149, - 'CALL_KW_BOUND_METHOD': 150, - 'CALL_KW_NON_PY': 151, - 'CALL_KW_PY': 152, - 'CALL_LEN': 153, - 'CALL_LIST_APPEND': 154, - 'CALL_METHOD_DESCRIPTOR_FAST': 155, - 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 156, - 'CALL_METHOD_DESCRIPTOR_NOARGS': 157, - 'CALL_METHOD_DESCRIPTOR_O': 158, - 'CALL_NON_PY_GENERAL': 159, - 'CALL_PY_EXACT_ARGS': 160, - 'CALL_PY_GENERAL': 161, - 'CALL_STR_1': 162, - 'CALL_TUPLE_1': 163, - 'CALL_TYPE_1': 164, - 'COMPARE_OP_FLOAT': 165, - 'COMPARE_OP_INT': 166, - 'COMPARE_OP_STR': 167, - 'CONTAINS_OP_DICT': 168, - 'CONTAINS_OP_SET': 169, - 'FOR_ITER_GEN': 170, - 'FOR_ITER_LIST': 171, - 'FOR_ITER_RANGE': 172, - 'FOR_ITER_TUPLE': 173, - 'JUMP_BACKWARD_JIT': 174, - 'JUMP_BACKWARD_NO_JIT': 175, - 'LOAD_ATTR_CLASS': 176, - 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 177, - 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 178, - 'LOAD_ATTR_INSTANCE_VALUE': 179, - 'LOAD_ATTR_METHOD_LAZY_DICT': 180, - 'LOAD_ATTR_METHOD_NO_DICT': 181, - 'LOAD_ATTR_METHOD_WITH_VALUES': 182, - 'LOAD_ATTR_MODULE': 183, - 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 184, - 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 185, - 'LOAD_ATTR_PROPERTY': 186, - 'LOAD_ATTR_SLOT': 187, - 'LOAD_ATTR_WITH_HINT': 188, - 'LOAD_CONST_IMMORTAL': 189, - 'LOAD_CONST_MORTAL': 190, - 'LOAD_GLOBAL_BUILTIN': 191, - 'LOAD_GLOBAL_MODULE': 192, - 'LOAD_SUPER_ATTR_ATTR': 193, - 'LOAD_SUPER_ATTR_METHOD': 194, - 'RESUME_CHECK': 195, - 'SEND_GEN': 196, - 'STORE_ATTR_INSTANCE_VALUE': 197, - 'STORE_ATTR_SLOT': 198, - 'STORE_ATTR_WITH_HINT': 199, - 'STORE_SUBSCR_DICT': 200, - 'STORE_SUBSCR_LIST_INT': 201, - 'TO_BOOL_ALWAYS_TRUE': 202, - 'TO_BOOL_BOOL': 203, - 'TO_BOOL_INT': 204, - 'TO_BOOL_LIST': 205, - 'TO_BOOL_NONE': 206, - 'TO_BOOL_STR': 207, - 'UNPACK_SEQUENCE_LIST': 208, - 'UNPACK_SEQUENCE_TUPLE': 209, - 'UNPACK_SEQUENCE_TWO_TUPLE': 210, + 'BINARY_OP_SUBSCR_LIST_SLICE': 138, + 'BINARY_OP_SUBSCR_STR_INT': 139, + 'BINARY_OP_SUBSCR_TUPLE_INT': 140, + 'BINARY_OP_SUBTRACT_FLOAT': 141, + 'BINARY_OP_SUBTRACT_INT': 142, + 'CALL_ALLOC_AND_ENTER_INIT': 143, + 'CALL_BOUND_METHOD_EXACT_ARGS': 144, + 'CALL_BOUND_METHOD_GENERAL': 145, + 'CALL_BUILTIN_CLASS': 146, + 'CALL_BUILTIN_FAST': 147, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 148, + 'CALL_BUILTIN_O': 149, + 'CALL_ISINSTANCE': 150, + 'CALL_KW_BOUND_METHOD': 151, + 'CALL_KW_NON_PY': 152, + 'CALL_KW_PY': 153, + 'CALL_LEN': 154, + 'CALL_LIST_APPEND': 155, + 'CALL_METHOD_DESCRIPTOR_FAST': 156, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 157, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 158, + 'CALL_METHOD_DESCRIPTOR_O': 159, + 'CALL_NON_PY_GENERAL': 160, + 'CALL_PY_EXACT_ARGS': 161, + 'CALL_PY_GENERAL': 162, + 'CALL_STR_1': 163, + 'CALL_TUPLE_1': 164, + 'CALL_TYPE_1': 165, + 'COMPARE_OP_FLOAT': 166, + 'COMPARE_OP_INT': 167, + 'COMPARE_OP_STR': 168, + 'CONTAINS_OP_DICT': 169, + 'CONTAINS_OP_SET': 170, + 'FOR_ITER_GEN': 171, + 'FOR_ITER_LIST': 172, + 'FOR_ITER_RANGE': 173, + 'FOR_ITER_TUPLE': 174, + 'JUMP_BACKWARD_JIT': 175, + 'JUMP_BACKWARD_NO_JIT': 176, + 'LOAD_ATTR_CLASS': 177, + 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 178, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 179, + 'LOAD_ATTR_INSTANCE_VALUE': 180, + 'LOAD_ATTR_METHOD_LAZY_DICT': 181, + 'LOAD_ATTR_METHOD_NO_DICT': 182, + 'LOAD_ATTR_METHOD_WITH_VALUES': 183, + 'LOAD_ATTR_MODULE': 184, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 185, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 186, + 'LOAD_ATTR_PROPERTY': 187, + 'LOAD_ATTR_SLOT': 188, + 'LOAD_ATTR_WITH_HINT': 189, + 'LOAD_GLOBAL_BUILTIN': 190, + 'LOAD_GLOBAL_MODULE': 191, + 'LOAD_SUPER_ATTR_ATTR': 192, + 'LOAD_SUPER_ATTR_METHOD': 193, + 'RESUME_CHECK': 194, + 'SEND_GEN': 195, + 'STORE_ATTR_INSTANCE_VALUE': 196, + 'STORE_ATTR_SLOT': 197, + 'STORE_ATTR_WITH_HINT': 198, + 'STORE_SUBSCR_DICT': 199, + 'STORE_SUBSCR_LIST_INT': 200, + 'TO_BOOL_ALWAYS_TRUE': 201, + 'TO_BOOL_BOOL': 202, + 'TO_BOOL_INT': 203, + 'TO_BOOL_LIST': 204, + 'TO_BOOL_NONE': 205, + 'TO_BOOL_STR': 206, + 'UNPACK_SEQUENCE_LIST': 207, + 'UNPACK_SEQUENCE_TUPLE': 208, + 'UNPACK_SEQUENCE_TWO_TUPLE': 209, } opmap = { @@ -215,121 +211,123 @@ opmap = { 'INSTRUMENTED_LINE': 254, 'ENTER_EXECUTOR': 255, 'BINARY_SLICE': 1, - 'CALL_FUNCTION_EX': 2, - 'CHECK_EG_MATCH': 4, - 'CHECK_EXC_MATCH': 5, - 'CLEANUP_THROW': 6, - 'DELETE_SUBSCR': 7, - 'END_FOR': 8, - 'END_SEND': 9, - 'EXIT_INIT_CHECK': 10, - 'FORMAT_SIMPLE': 11, - 'FORMAT_WITH_SPEC': 12, - 'GET_AITER': 13, - 'GET_ANEXT': 14, - 'GET_ITER': 15, - 'GET_LEN': 16, - 'GET_YIELD_FROM_ITER': 18, - 'INTERPRETER_EXIT': 19, - 'LOAD_BUILD_CLASS': 20, - 'LOAD_LOCALS': 21, - 'MAKE_FUNCTION': 22, - 'MATCH_KEYS': 23, - 'MATCH_MAPPING': 24, - 'MATCH_SEQUENCE': 25, - 'NOP': 26, - 'NOT_TAKEN': 27, - 'POP_EXCEPT': 28, - 'POP_ITER': 29, - 'POP_TOP': 30, - 'PUSH_EXC_INFO': 31, - 'PUSH_NULL': 32, - 'RETURN_GENERATOR': 33, - 'RETURN_VALUE': 34, - 'SETUP_ANNOTATIONS': 35, - 'STORE_SLICE': 36, - 'STORE_SUBSCR': 37, - 'TO_BOOL': 38, - 'UNARY_INVERT': 39, - 'UNARY_NEGATIVE': 40, - 'UNARY_NOT': 41, - 'WITH_EXCEPT_START': 42, - 'BINARY_OP': 43, - 'BUILD_LIST': 44, - 'BUILD_MAP': 45, - 'BUILD_SET': 46, - 'BUILD_SLICE': 47, - 'BUILD_STRING': 48, - 'BUILD_TUPLE': 49, - 'CALL': 50, - 'CALL_INTRINSIC_1': 51, - 'CALL_INTRINSIC_2': 52, - 'CALL_KW': 53, - 'COMPARE_OP': 54, - 'CONTAINS_OP': 55, - 'CONVERT_VALUE': 56, - 'COPY': 57, - 'COPY_FREE_VARS': 58, - 'DELETE_ATTR': 59, - 'DELETE_DEREF': 60, - 'DELETE_FAST': 61, - 'DELETE_GLOBAL': 62, - 'DELETE_NAME': 63, - 'DICT_MERGE': 64, - 'DICT_UPDATE': 65, - 'END_ASYNC_FOR': 66, - 'EXTENDED_ARG': 67, - 'FOR_ITER': 68, - 'GET_AWAITABLE': 69, - 'IMPORT_FROM': 70, - 'IMPORT_NAME': 71, - 'IS_OP': 72, - 'JUMP_BACKWARD': 73, - 'JUMP_BACKWARD_NO_INTERRUPT': 74, - 'JUMP_FORWARD': 75, - 'LIST_APPEND': 76, - 'LIST_EXTEND': 77, - 'LOAD_ATTR': 78, - 'LOAD_COMMON_CONSTANT': 79, - 'LOAD_CONST': 80, - 'LOAD_DEREF': 81, - 'LOAD_FAST': 82, - 'LOAD_FAST_AND_CLEAR': 83, - 'LOAD_FAST_BORROW': 84, - 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 85, - 'LOAD_FAST_CHECK': 86, - 'LOAD_FAST_LOAD_FAST': 87, - 'LOAD_FROM_DICT_OR_DEREF': 88, - 'LOAD_FROM_DICT_OR_GLOBALS': 89, - 'LOAD_GLOBAL': 90, - 'LOAD_NAME': 91, - 'LOAD_SMALL_INT': 92, - 'LOAD_SPECIAL': 93, - 'LOAD_SUPER_ATTR': 94, - 'MAKE_CELL': 95, - 'MAP_ADD': 96, - 'MATCH_CLASS': 97, - 'POP_JUMP_IF_FALSE': 98, - 'POP_JUMP_IF_NONE': 99, - 'POP_JUMP_IF_NOT_NONE': 100, - 'POP_JUMP_IF_TRUE': 101, - 'RAISE_VARARGS': 102, - 'RERAISE': 103, - 'SEND': 104, - 'SET_ADD': 105, - 'SET_FUNCTION_ATTRIBUTE': 106, - 'SET_UPDATE': 107, - 'STORE_ATTR': 108, - 'STORE_DEREF': 109, - 'STORE_FAST': 110, - 'STORE_FAST_LOAD_FAST': 111, - 'STORE_FAST_STORE_FAST': 112, - 'STORE_GLOBAL': 113, - 'STORE_NAME': 114, - 'SWAP': 115, - 'UNPACK_EX': 116, - 'UNPACK_SEQUENCE': 117, - 'YIELD_VALUE': 118, + 'BUILD_TEMPLATE': 2, + 'CALL_FUNCTION_EX': 4, + 'CHECK_EG_MATCH': 5, + 'CHECK_EXC_MATCH': 6, + 'CLEANUP_THROW': 7, + 'DELETE_SUBSCR': 8, + 'END_FOR': 9, + 'END_SEND': 10, + 'EXIT_INIT_CHECK': 11, + 'FORMAT_SIMPLE': 12, + 'FORMAT_WITH_SPEC': 13, + 'GET_AITER': 14, + 'GET_ANEXT': 15, + 'GET_ITER': 16, + 'GET_LEN': 18, + 'GET_YIELD_FROM_ITER': 19, + 'INTERPRETER_EXIT': 20, + 'LOAD_BUILD_CLASS': 21, + 'LOAD_LOCALS': 22, + 'MAKE_FUNCTION': 23, + 'MATCH_KEYS': 24, + 'MATCH_MAPPING': 25, + 'MATCH_SEQUENCE': 26, + 'NOP': 27, + 'NOT_TAKEN': 28, + 'POP_EXCEPT': 29, + 'POP_ITER': 30, + 'POP_TOP': 31, + 'PUSH_EXC_INFO': 32, + 'PUSH_NULL': 33, + 'RETURN_GENERATOR': 34, + 'RETURN_VALUE': 35, + 'SETUP_ANNOTATIONS': 36, + 'STORE_SLICE': 37, + 'STORE_SUBSCR': 38, + 'TO_BOOL': 39, + 'UNARY_INVERT': 40, + 'UNARY_NEGATIVE': 41, + 'UNARY_NOT': 42, + 'WITH_EXCEPT_START': 43, + 'BINARY_OP': 44, + 'BUILD_INTERPOLATION': 45, + 'BUILD_LIST': 46, + 'BUILD_MAP': 47, + 'BUILD_SET': 48, + 'BUILD_SLICE': 49, + 'BUILD_STRING': 50, + 'BUILD_TUPLE': 51, + 'CALL': 52, + 'CALL_INTRINSIC_1': 53, + 'CALL_INTRINSIC_2': 54, + 'CALL_KW': 55, + 'COMPARE_OP': 56, + 'CONTAINS_OP': 57, + 'CONVERT_VALUE': 58, + 'COPY': 59, + 'COPY_FREE_VARS': 60, + 'DELETE_ATTR': 61, + 'DELETE_DEREF': 62, + 'DELETE_FAST': 63, + 'DELETE_GLOBAL': 64, + 'DELETE_NAME': 65, + 'DICT_MERGE': 66, + 'DICT_UPDATE': 67, + 'END_ASYNC_FOR': 68, + 'EXTENDED_ARG': 69, + 'FOR_ITER': 70, + 'GET_AWAITABLE': 71, + 'IMPORT_FROM': 72, + 'IMPORT_NAME': 73, + 'IS_OP': 74, + 'JUMP_BACKWARD': 75, + 'JUMP_BACKWARD_NO_INTERRUPT': 76, + 'JUMP_FORWARD': 77, + 'LIST_APPEND': 78, + 'LIST_EXTEND': 79, + 'LOAD_ATTR': 80, + 'LOAD_COMMON_CONSTANT': 81, + 'LOAD_CONST': 82, + 'LOAD_DEREF': 83, + 'LOAD_FAST': 84, + 'LOAD_FAST_AND_CLEAR': 85, + 'LOAD_FAST_BORROW': 86, + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 87, + 'LOAD_FAST_CHECK': 88, + 'LOAD_FAST_LOAD_FAST': 89, + 'LOAD_FROM_DICT_OR_DEREF': 90, + 'LOAD_FROM_DICT_OR_GLOBALS': 91, + 'LOAD_GLOBAL': 92, + 'LOAD_NAME': 93, + 'LOAD_SMALL_INT': 94, + 'LOAD_SPECIAL': 95, + 'LOAD_SUPER_ATTR': 96, + 'MAKE_CELL': 97, + 'MAP_ADD': 98, + 'MATCH_CLASS': 99, + 'POP_JUMP_IF_FALSE': 100, + 'POP_JUMP_IF_NONE': 101, + 'POP_JUMP_IF_NOT_NONE': 102, + 'POP_JUMP_IF_TRUE': 103, + 'RAISE_VARARGS': 104, + 'RERAISE': 105, + 'SEND': 106, + 'SET_ADD': 107, + 'SET_FUNCTION_ATTRIBUTE': 108, + 'SET_UPDATE': 109, + 'STORE_ATTR': 110, + 'STORE_DEREF': 111, + 'STORE_FAST': 112, + 'STORE_FAST_LOAD_FAST': 113, + 'STORE_FAST_STORE_FAST': 114, + 'STORE_GLOBAL': 115, + 'STORE_NAME': 116, + 'SWAP': 117, + 'UNPACK_EX': 118, + 'UNPACK_SEQUENCE': 119, + 'YIELD_VALUE': 120, 'INSTRUMENTED_END_FOR': 234, 'INSTRUMENTED_POP_ITER': 235, 'INSTRUMENTED_END_SEND': 236, @@ -350,17 +348,18 @@ opmap = { 'INSTRUMENTED_CALL_KW': 251, 'INSTRUMENTED_CALL_FUNCTION_EX': 252, 'INSTRUMENTED_JUMP_BACKWARD': 253, - 'JUMP': 256, - 'JUMP_IF_FALSE': 257, - 'JUMP_IF_TRUE': 258, - 'JUMP_NO_INTERRUPT': 259, - 'LOAD_CLOSURE': 260, - 'POP_BLOCK': 261, - 'SETUP_CLEANUP': 262, - 'SETUP_FINALLY': 263, - 'SETUP_WITH': 264, - 'STORE_FAST_MAYBE_NULL': 265, + 'ANNOTATIONS_PLACEHOLDER': 256, + 'JUMP': 257, + 'JUMP_IF_FALSE': 258, + 'JUMP_IF_TRUE': 259, + 'JUMP_NO_INTERRUPT': 260, + 'LOAD_CLOSURE': 261, + 'POP_BLOCK': 262, + 'SETUP_CLEANUP': 263, + 'SETUP_FINALLY': 264, + 'SETUP_WITH': 265, + 'STORE_FAST_MAYBE_NULL': 266, } -HAVE_ARGUMENT = 42 +HAVE_ARGUMENT = 43 MIN_INSTRUMENTED_OPCODE = 234 diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py index 3cdc6ffe198..cbaa9445862 100644 --- a/Lib/_py_warnings.py +++ b/Lib/_py_warnings.py @@ -371,7 +371,7 @@ def _setoption(arg): if message: message = re.escape(message) if module: - module = re.escape(module) + r'\Z' + module = re.escape(module) + r'\z' if lineno: try: lineno = int(lineno) diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index 50e21a12335..71f619024e5 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -467,6 +467,7 @@ def _parse_isoformat_time(tstr): hour, minute, second, microsecond = time_comps became_next_day = False error_from_components = False + error_from_tz = None if (hour == 24): if all(time_comp == 0 for time_comp in time_comps[1:]): hour = 0 @@ -500,14 +501,22 @@ def _parse_isoformat_time(tstr): else: tzsign = -1 if tstr[tz_pos - 1] == '-' else 1 - td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], - seconds=tz_comps[2], microseconds=tz_comps[3]) - - tzi = timezone(tzsign * td) + try: + # This function is intended to validate datetimes, but because + # we restrict time zones to ±24h, it serves here as well. + _check_time_fields(hour=tz_comps[0], minute=tz_comps[1], + second=tz_comps[2], microsecond=tz_comps[3], + fold=0) + except ValueError as e: + error_from_tz = e + else: + td = timedelta(hours=tz_comps[0], minutes=tz_comps[1], + seconds=tz_comps[2], microseconds=tz_comps[3]) + tzi = timezone(tzsign * td) time_comps.append(tzi) - return time_comps, became_next_day, error_from_components + return time_comps, became_next_day, error_from_components, error_from_tz # tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar def _isoweek_to_gregorian(year, week, day): @@ -1050,8 +1059,12 @@ def fromordinal(cls, n): @classmethod def fromisoformat(cls, date_string): """Construct a date from a string in ISO 8601 format.""" + if not isinstance(date_string, str): - raise TypeError('fromisoformat: argument must be str') + raise TypeError('Argument must be a str') + + if not date_string.isascii(): + raise ValueError('Argument must be an ASCII str') if len(date_string) not in (7, 8, 10): raise ValueError(f'Invalid isoformat string: {date_string!r}') @@ -1123,8 +1136,8 @@ def isoformat(self): This is 'YYYY-MM-DD'. References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + - https://www.w3.org/TR/NOTE-datetime + - https://www.cl.cam.ac.uk/~mgk25/iso-time.html """ return "%04d-%02d-%02d" % (self._year, self._month, self._day) @@ -1258,7 +1271,7 @@ def isocalendar(self): The first week is 1; Monday is 1 ... Sunday is 7. ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + https://www.phys.uu.nl/~vgent/calendar/isocalendar.htm (used with permission) """ year = self._year @@ -1629,9 +1642,21 @@ def fromisoformat(cls, time_string): time_string = time_string.removeprefix('T') try: - return cls(*_parse_isoformat_time(time_string)[0]) - except Exception: - raise ValueError(f'Invalid isoformat string: {time_string!r}') + time_components, _, error_from_components, error_from_tz = ( + _parse_isoformat_time(time_string) + ) + except ValueError: + raise ValueError( + f'Invalid isoformat string: {time_string!r}') from None + else: + if error_from_tz: + raise error_from_tz + if error_from_components: + raise ValueError( + "Minute, second, and microsecond must be 0 when hour is 24" + ) + + return cls(*time_components) def strftime(self, format): """Format using strftime(). The date part of the timestamp passed @@ -1943,11 +1968,16 @@ def fromisoformat(cls, date_string): if tstr: try: - time_components, became_next_day, error_from_components = _parse_isoformat_time(tstr) + (time_components, + became_next_day, + error_from_components, + error_from_tz) = _parse_isoformat_time(tstr) except ValueError: raise ValueError( f'Invalid isoformat string: {date_string!r}') from None else: + if error_from_tz: + raise error_from_tz if error_from_components: raise ValueError("minute, second, and microsecond must be 0 when hour is 24") @@ -2085,7 +2115,6 @@ def _local_timezone(self): else: ts = (self - _EPOCH) // timedelta(seconds=1) localtm = _time.localtime(ts) - local = datetime(*localtm[:6]) # Extract TZ data gmtoff = localtm.tm_gmtoff zone = localtm.tm_zone diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index ec036199331..46fa9ffcb1e 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -38,10 +38,10 @@ 'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN', 'ROUND_05UP', # Functions for manipulating contexts - 'setcontext', 'getcontext', 'localcontext', + 'setcontext', 'getcontext', 'localcontext', 'IEEEContext', # Limits for the C version for compatibility - 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', + 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', 'IEEE_CONTEXT_MAX_BITS', # C version: compile time choice that enables the thread local context (deprecated, now always true) 'HAVE_THREADS', @@ -83,10 +83,12 @@ MAX_PREC = 999999999999999999 MAX_EMAX = 999999999999999999 MIN_EMIN = -999999999999999999 + IEEE_CONTEXT_MAX_BITS = 512 else: MAX_PREC = 425000000 MAX_EMAX = 425000000 MIN_EMIN = -425000000 + IEEE_CONTEXT_MAX_BITS = 256 MIN_ETINY = MIN_EMIN - (MAX_PREC-1) @@ -417,6 +419,27 @@ def sin(x): return ctx_manager +def IEEEContext(bits, /): + """ + Return a context object initialized to the proper values for one of the + IEEE interchange formats. The argument must be a multiple of 32 and less + than IEEE_CONTEXT_MAX_BITS. + """ + if bits <= 0 or bits > IEEE_CONTEXT_MAX_BITS or bits % 32: + raise ValueError("argument must be a multiple of 32, " + f"with a maximum of {IEEE_CONTEXT_MAX_BITS}") + + ctx = Context() + ctx.prec = 9 * (bits//32) - 2 + ctx.Emax = 3 * (1 << (bits//16 + 3)) + ctx.Emin = 1 - ctx.Emax + ctx.rounding = ROUND_HALF_EVEN + ctx.clamp = 1 + ctx.traps = dict.fromkeys(_signals, False) + + return ctx + + ##### Decimal class ####################################################### # Do not subclass Decimal from numbers.Real and do not register it as such @@ -2440,12 +2463,12 @@ def __pow__(self, other, modulo=None, context=None): return ans - def __rpow__(self, other, context=None): + def __rpow__(self, other, modulo=None, context=None): """Swaps self/other and returns __pow__.""" other = _convert_other(other) if other is NotImplemented: return other - return other.__pow__(self, context=context) + return other.__pow__(self, modulo, context=context) def normalize(self, context=None): """Normalize- strip trailing 0s, change anything equal to 0 to 0e0""" @@ -6073,7 +6096,7 @@ def _convert_for_comparison(self, other, equality_op=False): (?P\d*) # with (possibly empty) diagnostic info. ) # \s* - \Z + \z """, re.VERBOSE | re.IGNORECASE).match _all_zeros = re.compile('0*$').match @@ -6098,10 +6121,10 @@ def _convert_for_comparison(self, other, equality_op=False): (?P\#)? (?P0)? (?P(?!0)\d+)? -(?P,)? +(?P[,_])? (?:\.(?P0|(?!0)\d+))? (?P[eEfFgGn%])? -\Z +\z """, re.VERBOSE|re.DOTALL) del re diff --git a/Lib/_pyio.py b/Lib/_pyio.py index b875103bee4..fb2a6d049ca 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -407,6 +407,9 @@ def __del__(self): if closed: return + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + # If close() fails, the caller logs the exception with # sys.unraisablehook. close() must be called at the end at __del__(). self.close() @@ -645,8 +648,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -853,6 +854,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -1563,7 +1568,8 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) @@ -1600,12 +1606,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1780,7 +1785,7 @@ def close(self): if not self.closed: self._stat_atopen = None try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -2056,8 +2061,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, raise ValueError("invalid encoding: %r" % encoding) if not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") + msg = "%r is not a text encoding" raise LookupError(msg % encoding) if errors is None: @@ -2690,6 +2694,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/_pyrepl/__main__.py b/Lib/_pyrepl/__main__.py index 3fa992eee8e..9c66812e13a 100644 --- a/Lib/_pyrepl/__main__.py +++ b/Lib/_pyrepl/__main__.py @@ -1,6 +1,10 @@ # Important: don't add things to this module, as they will end up in the REPL's # default globals. Use _pyrepl.main instead. +# Avoid caching this file by linecache and incorrectly report tracebacks. +# See https://github.com/python/cpython/issues/129098. +__spec__ = __loader__ = None + if __name__ == "__main__": from .main import interactive_console as __pyrepl_interactive_console __pyrepl_interactive_console() diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py new file mode 100644 index 00000000000..494a501101a --- /dev/null +++ b/Lib/_pyrepl/_module_completer.py @@ -0,0 +1,388 @@ +from __future__ import annotations + +import pkgutil +import sys +import token +import tokenize +from io import StringIO +from contextlib import contextmanager +from dataclasses import dataclass +from itertools import chain +from tokenize import TokenInfo + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Iterable, Iterator, Mapping + + +def make_default_module_completer() -> ModuleCompleter: + # Inside pyrepl, __package__ is set to None by default + return ModuleCompleter(namespace={'__package__': None}) + + +class ModuleCompleter: + """A completer for Python import statements. + + Examples: + - import + - import foo + - import foo. + - import foo as bar, baz + + - from + - from foo + - from foo import + - from foo import bar + - from foo import (bar as baz, qux + """ + + def __init__(self, namespace: Mapping[str, Any] | None = None) -> None: + self.namespace = namespace or {} + self._global_cache: list[pkgutil.ModuleInfo] = [] + self._curr_sys_path: list[str] = sys.path[:] + + def get_completions(self, line: str) -> list[str]: + """Return the next possible import completions for 'line'.""" + result = ImportParser(line).parse() + if not result: + return [] + try: + return self.complete(*result) + except Exception: + # Some unexpected error occurred, make it look like + # no completions are available + return [] + + def complete(self, from_name: str | None, name: str | None) -> list[str]: + if from_name is None: + # import x.y.z + assert name is not None + path, prefix = self.get_path_and_prefix(name) + modules = self.find_modules(path, prefix) + return [self.format_completion(path, module) for module in modules] + + if name is None: + # from x.y.z + path, prefix = self.get_path_and_prefix(from_name) + modules = self.find_modules(path, prefix) + return [self.format_completion(path, module) for module in modules] + + # from x.y import z + return self.find_modules(from_name, name) + + def find_modules(self, path: str, prefix: str) -> list[str]: + """Find all modules under 'path' that start with 'prefix'.""" + modules = self._find_modules(path, prefix) + # Filter out invalid module names + # (for example those containing dashes that cannot be imported with 'import') + return [mod for mod in modules if mod.isidentifier()] + + def _find_modules(self, path: str, prefix: str) -> list[str]: + if not path: + # Top-level import (e.g. `import foo`` or `from foo`)` + builtin_modules = [name for name in sys.builtin_module_names + if self.is_suggestion_match(name, prefix)] + third_party_modules = [module.name for module in self.global_cache + if self.is_suggestion_match(module.name, prefix)] + return sorted(builtin_modules + third_party_modules) + + if path.startswith('.'): + # Convert relative path to absolute path + package = self.namespace.get('__package__', '') + path = self.resolve_relative_name(path, package) # type: ignore[assignment] + if path is None: + return [] + + modules: Iterable[pkgutil.ModuleInfo] = self.global_cache + for segment in path.split('.'): + modules = [mod_info for mod_info in modules + if mod_info.ispkg and mod_info.name == segment] + modules = self.iter_submodules(modules) + return [module.name for module in modules + if self.is_suggestion_match(module.name, prefix)] + + def is_suggestion_match(self, module_name: str, prefix: str) -> bool: + if prefix: + return module_name.startswith(prefix) + # For consistency with attribute completion, which + # does not suggest private attributes unless requested. + return not module_name.startswith("_") + + def iter_submodules(self, parent_modules: list[pkgutil.ModuleInfo]) -> Iterator[pkgutil.ModuleInfo]: + """Iterate over all submodules of the given parent modules.""" + specs = [info.module_finder.find_spec(info.name, None) + for info in parent_modules if info.ispkg] + search_locations = set(chain.from_iterable( + getattr(spec, 'submodule_search_locations', []) + for spec in specs if spec + )) + return pkgutil.iter_modules(search_locations) + + def get_path_and_prefix(self, dotted_name: str) -> tuple[str, str]: + """ + Split a dotted name into an import path and a + final prefix that is to be completed. + + Examples: + 'foo.bar' -> 'foo', 'bar' + 'foo.' -> 'foo', '' + '.foo' -> '.', 'foo' + """ + if '.' not in dotted_name: + return '', dotted_name + if dotted_name.startswith('.'): + stripped = dotted_name.lstrip('.') + dots = '.' * (len(dotted_name) - len(stripped)) + if '.' not in stripped: + return dots, stripped + path, prefix = stripped.rsplit('.', 1) + return dots + path, prefix + path, prefix = dotted_name.rsplit('.', 1) + return path, prefix + + def format_completion(self, path: str, module: str) -> str: + if path == '' or path.endswith('.'): + return f'{path}{module}' + return f'{path}.{module}' + + def resolve_relative_name(self, name: str, package: str) -> str | None: + """Resolve a relative module name to an absolute name. + + Example: resolve_relative_name('.foo', 'bar') -> 'bar.foo' + """ + # taken from importlib._bootstrap + level = 0 + for character in name: + if character != '.': + break + level += 1 + bits = package.rsplit('.', level - 1) + if len(bits) < level: + return None + base = bits[0] + name = name[level:] + return f'{base}.{name}' if name else base + + @property + def global_cache(self) -> list[pkgutil.ModuleInfo]: + """Global module cache""" + if not self._global_cache or self._curr_sys_path != sys.path: + self._curr_sys_path = sys.path[:] + # print('getting packages') + self._global_cache = list(pkgutil.iter_modules()) + return self._global_cache + + +class ImportParser: + """ + Parses incomplete import statements that are + suitable for autocomplete suggestions. + + Examples: + - import foo -> Result(from_name=None, name='foo') + - import foo. -> Result(from_name=None, name='foo.') + - from foo -> Result(from_name='foo', name=None) + - from foo import bar -> Result(from_name='foo', name='bar') + - from .foo import ( -> Result(from_name='.foo', name='') + + Note that the parser works in reverse order, starting from the + last token in the input string. This makes the parser more robust + when parsing multiple statements. + """ + _ignored_tokens = { + token.INDENT, token.DEDENT, token.COMMENT, + token.NL, token.NEWLINE, token.ENDMARKER + } + _keywords = {'import', 'from', 'as'} + + def __init__(self, code: str) -> None: + self.code = code + tokens = [] + try: + for t in tokenize.generate_tokens(StringIO(code).readline): + if t.type not in self._ignored_tokens: + tokens.append(t) + except tokenize.TokenError as e: + if 'unexpected EOF' not in str(e): + # unexpected EOF is fine, since we're parsing an + # incomplete statement, but other errors are not + # because we may not have all the tokens so it's + # safer to bail out + tokens = [] + except SyntaxError: + tokens = [] + self.tokens = TokenQueue(tokens[::-1]) + + def parse(self) -> tuple[str | None, str | None] | None: + if not (res := self._parse()): + return None + return res.from_name, res.name + + def _parse(self) -> Result | None: + with self.tokens.save_state(): + return self.parse_from_import() + with self.tokens.save_state(): + return self.parse_import() + + def parse_import(self) -> Result: + if self.code.rstrip().endswith('import') and self.code.endswith(' '): + return Result(name='') + if self.tokens.peek_string(','): + name = '' + else: + if self.code.endswith(' '): + raise ParseError('parse_import') + name = self.parse_dotted_name() + if name.startswith('.'): + raise ParseError('parse_import') + while self.tokens.peek_string(','): + self.tokens.pop() + self.parse_dotted_as_name() + if self.tokens.peek_string('import'): + return Result(name=name) + raise ParseError('parse_import') + + def parse_from_import(self) -> Result: + stripped = self.code.rstrip() + if stripped.endswith('import') and self.code.endswith(' '): + return Result(from_name=self.parse_empty_from_import(), name='') + if stripped.endswith('from') and self.code.endswith(' '): + return Result(from_name='') + if self.tokens.peek_string('(') or self.tokens.peek_string(','): + return Result(from_name=self.parse_empty_from_import(), name='') + if self.code.endswith(' '): + raise ParseError('parse_from_import') + name = self.parse_dotted_name() + if '.' in name: + self.tokens.pop_string('from') + return Result(from_name=name) + if self.tokens.peek_string('from'): + return Result(from_name=name) + from_name = self.parse_empty_from_import() + return Result(from_name=from_name, name=name) + + def parse_empty_from_import(self) -> str: + if self.tokens.peek_string(','): + self.tokens.pop() + self.parse_as_names() + if self.tokens.peek_string('('): + self.tokens.pop() + self.tokens.pop_string('import') + return self.parse_from() + + def parse_from(self) -> str: + from_name = self.parse_dotted_name() + self.tokens.pop_string('from') + return from_name + + def parse_dotted_as_name(self) -> str: + self.tokens.pop_name() + if self.tokens.peek_string('as'): + self.tokens.pop() + with self.tokens.save_state(): + return self.parse_dotted_name() + + def parse_dotted_name(self) -> str: + name = [] + if self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + if (self.tokens.peek_name() + and (tok := self.tokens.peek()) + and tok.string not in self._keywords): + name.append(self.tokens.pop_name()) + if not name: + raise ParseError('parse_dotted_name') + while self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + if (self.tokens.peek_name() + and (tok := self.tokens.peek()) + and tok.string not in self._keywords): + name.append(self.tokens.pop_name()) + else: + break + + while self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + return ''.join(name[::-1]) + + def parse_as_names(self) -> None: + self.parse_as_name() + while self.tokens.peek_string(','): + self.tokens.pop() + self.parse_as_name() + + def parse_as_name(self) -> None: + self.tokens.pop_name() + if self.tokens.peek_string('as'): + self.tokens.pop() + self.tokens.pop_name() + + +class ParseError(Exception): + pass + + +@dataclass(frozen=True) +class Result: + from_name: str | None = None + name: str | None = None + + +class TokenQueue: + """Provides helper functions for working with a sequence of tokens.""" + + def __init__(self, tokens: list[TokenInfo]) -> None: + self.tokens: list[TokenInfo] = tokens + self.index: int = 0 + self.stack: list[int] = [] + + @contextmanager + def save_state(self) -> Any: + try: + self.stack.append(self.index) + yield + except ParseError: + self.index = self.stack.pop() + else: + self.stack.pop() + + def __bool__(self) -> bool: + return self.index < len(self.tokens) + + def peek(self) -> TokenInfo | None: + if not self: + return None + return self.tokens[self.index] + + def peek_name(self) -> bool: + if not (tok := self.peek()): + return False + return tok.type == token.NAME + + def pop_name(self) -> str: + tok = self.pop() + if tok.type != token.NAME: + raise ParseError('pop_name') + return tok.string + + def peek_string(self, string: str) -> bool: + if not (tok := self.peek()): + return False + return tok.string == string + + def pop_string(self, string: str) -> str: + tok = self.pop() + if tok.string != string: + raise ParseError('pop_string') + return tok.string + + def pop(self) -> TokenInfo: + if not self: + raise ParseError('pop') + tok = self.tokens[self.index] + self.index += 1 + return tok diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py index e018c4fc183..842599bd187 100644 --- a/Lib/_pyrepl/base_eventqueue.py +++ b/Lib/_pyrepl/base_eventqueue.py @@ -69,18 +69,14 @@ def insert(self, event: Event) -> None: trace('added event {event}', event=event) self.events.append(event) - def push(self, char: int | bytes | str) -> None: + def push(self, char: int | bytes) -> None: """ Processes a character by updating the buffer and handling special key mappings. """ + assert isinstance(char, (int, bytes)) ord_char = char if isinstance(char, int) else ord(char) - if ord_char > 255: - assert isinstance(char, str) - char = bytes(char.encode(self.encoding, "replace")) - self.buf.extend(char) - else: - char = bytes(bytearray((ord_char,))) - self.buf.append(ord_char) + char = ord_char.to_bytes() + self.buf.append(ord_char) if char in self.keymap: if self.keymap is self.compiled_keymap: diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index cbb6d85f683..2354fbb2ec2 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -21,6 +21,7 @@ from __future__ import annotations import os +import time # Categories of actions: # killing @@ -31,6 +32,7 @@ # finishing # [completion] +from .trace import trace # types if False: @@ -437,7 +439,7 @@ def do(self) -> None: import _sitebuiltins with self.reader.suspend(): - self.reader.msg = _sitebuiltins._Helper()() # type: ignore[assignment, call-arg] + self.reader.msg = _sitebuiltins._Helper()() # type: ignore[assignment] class invalid_key(Command): @@ -471,19 +473,24 @@ def do(self) -> None: class paste_mode(Command): - def do(self) -> None: self.reader.paste_mode = not self.reader.paste_mode self.reader.dirty = True -class enable_bracketed_paste(Command): +class perform_bracketed_paste(Command): def do(self) -> None: - self.reader.paste_mode = True - self.reader.in_bracketed_paste = True - -class disable_bracketed_paste(Command): - def do(self) -> None: - self.reader.paste_mode = False - self.reader.in_bracketed_paste = False - self.reader.dirty = True + done = "\x1b[201~" + data = "" + start = time.time() + while done not in data: + self.reader.console.wait(100) + ev = self.reader.console.getpending() + data += ev.data + trace( + "bracketed pasting of {l} chars done in {s:.2f}s", + l=len(data), + s=time.time() - start, + ) + self.reader.insert(data.replace(done, "")) + self.reader.last_refresh_cache.invalidated = True diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py index 1cd4b6367ca..9d2d43be514 100644 --- a/Lib/_pyrepl/completing_reader.py +++ b/Lib/_pyrepl/completing_reader.py @@ -293,3 +293,7 @@ def get_stem(self) -> str: def get_completions(self, stem: str) -> list[str]: return [] + + def get_line(self) -> str: + """Return the current line until the cursor position.""" + return ''.join(self.buffer[:self.pos]) diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py index a6f824dcc4a..447eb1e551e 100644 --- a/Lib/_pyrepl/main.py +++ b/Lib/_pyrepl/main.py @@ -1,6 +1,7 @@ import errno import os import sys +import types CAN_USE_PYREPL: bool @@ -29,12 +30,10 @@ def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): print(FAIL_REASON, file=sys.stderr) return sys._baserepl() - if mainmodule: - namespace = mainmodule.__dict__ - else: - import __main__ - namespace = __main__.__dict__ - namespace.pop("__pyrepl_interactive_console", None) + if not mainmodule: + mainmodule = types.ModuleType("__main__") + + namespace = mainmodule.__dict__ # sys._baserepl() above does this internally, we do it here startup_path = os.getenv("PYTHONSTARTUP") diff --git a/Lib/_pyrepl/mypy.ini b/Lib/_pyrepl/mypy.ini index eabd0e9b440..9375a55b53c 100644 --- a/Lib/_pyrepl/mypy.ini +++ b/Lib/_pyrepl/mypy.ini @@ -23,7 +23,3 @@ check_untyped_defs = False # Various internal modules that typeshed deliberately doesn't have stubs for: [mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*] ignore_missing_imports = True - -# Other untyped parts of the stdlib -[mypy-idlelib.*] -ignore_missing_imports = True diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 7fc2422dac9..0ebd9162eca 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -22,14 +22,13 @@ from __future__ import annotations import sys +import _colorize from contextlib import contextmanager from dataclasses import dataclass, field, fields -from _colorize import can_colorize, ANSIColors - from . import commands, console, input -from .utils import wlen, unbracket, disp_str +from .utils import wlen, unbracket, disp_str, gen_colors, THEME from .trace import trace @@ -38,8 +37,7 @@ from .types import Callback, SimpleContextManager, KeySpec, CommandName -# syntax classes: - +# syntax classes SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) @@ -105,8 +103,7 @@ def make_default_commands() -> dict[CommandName, type[Command]]: (r"\M-9", "digit-arg"), (r"\M-\n", "accept"), ("\\\\", "self-insert"), - (r"\x1b[200~", "enable_bracketed_paste"), - (r"\x1b[201~", "disable_bracketed_paste"), + (r"\x1b[200~", "perform-bracketed-paste"), (r"\x03", "ctrl-c"), ] + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] @@ -144,16 +141,17 @@ class Reader: Instance variables of note include: * buffer: - A *list* (*not* a string at the moment :-) containing all the - characters that have been entered. + A per-character list containing all the characters that have been + entered. Does not include color information. * console: Hopefully encapsulates the OS dependent stuff. * pos: A 0-based index into 'buffer' for where the insertion point is. * screeninfo: - Ahem. This list contains some info needed to move the - insertion point around reasonably efficiently. + A list of screen position tuples. Each list element is a tuple + representing information on visible line length for a given line. + Allows for efficient skipping of color escape sequences. * cxy, lxy: the position of the insertion point in screen ... * syntax_table: @@ -203,7 +201,6 @@ class Reader: dirty: bool = False finished: bool = False paste_mode: bool = False - in_bracketed_paste: bool = False commands: dict[str, type[Command]] = field(default_factory=make_default_commands) last_command: type[Command] | None = None syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) @@ -221,7 +218,6 @@ class Reader: ## cached metadata to speed up screen refreshes @dataclass class RefreshCache: - in_bracketed_paste: bool = False screen: list[str] = field(default_factory=list) screeninfo: list[tuple[int, list[int]]] = field(init=False) line_end_offsets: list[int] = field(default_factory=list) @@ -235,7 +231,6 @@ def update_cache(self, screen: list[str], screeninfo: list[tuple[int, list[int]]], ) -> None: - self.in_bracketed_paste = reader.in_bracketed_paste self.screen = screen.copy() self.screeninfo = screeninfo.copy() self.pos = reader.pos @@ -248,8 +243,7 @@ def valid(self, reader: Reader) -> bool: return False dimensions = reader.console.width, reader.console.height dimensions_changed = dimensions != self.dimensions - paste_changed = reader.in_bracketed_paste != self.in_bracketed_paste - return not (dimensions_changed or paste_changed) + return not dimensions_changed def get_cached_location(self, reader: Reader) -> tuple[int, int]: if self.invalidated: @@ -279,7 +273,7 @@ def __post_init__(self) -> None: self.screeninfo = [(0, [])] self.cxy = self.pos2xy() self.lxy = (self.pos, 0) - self.can_colorize = can_colorize() + self.can_colorize = _colorize.can_colorize() self.last_refresh_cache.screeninfo = self.screeninfo self.last_refresh_cache.pos = self.pos @@ -316,6 +310,12 @@ def calc_screen(self) -> list[str]: pos -= offset prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + if self.can_colorize: + colors = list(gen_colors(self.get_unicode())) + else: + colors = None + trace("colors = {colors}", colors=colors) lines = "".join(self.buffer[offset:]).split("\n") cursor_found = False lines_beyond_cursor = 0 @@ -343,9 +343,8 @@ def calc_screen(self) -> list[str]: screeninfo.append((0, [])) pos -= line_len + 1 prompt, prompt_len = self.process_prompt(prompt) - chars, char_widths = disp_str(line) + chars, char_widths = disp_str(line, colors, offset) wrapcount = (sum(char_widths) + prompt_len) // self.console.width - trace("wrapcount = {wrapcount}", wrapcount=wrapcount) if wrapcount == 0 or not char_widths: offset += line_len + 1 # Takes all of the line plus the newline last_refresh_line_end_offsets.append(offset) @@ -479,7 +478,7 @@ def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: 'lineno'.""" if self.arg is not None and cursor_on_line: prompt = f"(arg: {self.arg}) " - elif self.paste_mode and not self.in_bracketed_paste: + elif self.paste_mode: prompt = "(paste) " elif "\n" in self.buffer: if lineno == 0: @@ -492,7 +491,8 @@ def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: prompt = self.ps1 if self.can_colorize: - prompt = f"{ANSIColors.BOLD_MAGENTA}{prompt}{ANSIColors.RESET}" + t = THEME() + prompt = f"{t.prompt}{prompt}{t.reset}" return prompt def push_input_trans(self, itrans: input.KeymapTranslator) -> None: @@ -567,6 +567,7 @@ def insert(self, text: str | list[str]) -> None: def update_cursor(self) -> None: """Move the cursor to reflect changes in self.pos""" self.cxy = self.pos2xy() + trace("update_cursor({pos}) = {cxy}", pos=self.pos, cxy=self.cxy) self.console.move_cursor(*self.cxy) def after_command(self, cmd: Command) -> None: @@ -633,9 +634,6 @@ def update_screen(self) -> None: def refresh(self) -> None: """Recalculate and refresh the screen.""" - if self.in_bracketed_paste and self.buffer and not self.buffer[-1] == "\n": - return - # this call sets up self.cxy, so call it first. self.screen = self.calc_screen() self.console.refresh(self.screen, self.cxy) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py index be229488e54..572eee520e5 100644 --- a/Lib/_pyrepl/readline.py +++ b/Lib/_pyrepl/readline.py @@ -39,6 +39,7 @@ from . import commands, historical_reader from .completing_reader import CompletingReader from .console import Console as ConsoleType +from ._module_completer import ModuleCompleter, make_default_module_completer Console: type[ConsoleType] _error: tuple[type[Exception], ...] | type[Exception] @@ -89,6 +90,7 @@ # "set_pre_input_hook", "set_startup_hook", "write_history_file", + "append_history_file", # ---- multiline extensions ---- "multiline_input", ] @@ -99,7 +101,7 @@ class ReadlineConfig: readline_completer: Completer | None = None completer_delims: frozenset[str] = frozenset(" \t\n`~!@#$%^&*()-=+[{]}\\|;:'\",<>/?") - + module_completer: ModuleCompleter = field(default_factory=make_default_module_completer) @dataclass(kw_only=True) class ReadlineAlikeReader(historical_reader.HistoricalReader, CompletingReader): @@ -132,6 +134,8 @@ def get_stem(self) -> str: return "".join(b[p + 1 : self.pos]) def get_completions(self, stem: str) -> list[str]: + if module_completions := self.get_module_completions(): + return module_completions if len(stem) == 0 and self.more_lines is not None: b = self.buffer p = self.pos @@ -161,6 +165,10 @@ def get_completions(self, stem: str) -> list[str]: result.sort() return result + def get_module_completions(self) -> list[str]: + line = self.get_line() + return self.config.module_completer.get_completions(line) + def get_trimmed_history(self, maxlength: int) -> list[str]: if maxlength >= 0: cut = len(self.history) - maxlength @@ -268,10 +276,6 @@ def do(self) -> None: r = self.reader # type: ignore[assignment] r.dirty = True # this is needed to hide the completion menu, if visible - if self.reader.in_bracketed_paste: - r.insert("\n") - return - # if there are already several lines and the cursor # is not on the last one, always insert a new \n. text = r.get_unicode() @@ -446,6 +450,7 @@ def read_history_file(self, filename: str = gethistoryfile()) -> None: del buffer[:] if line: history.append(line) + self.set_history_length(self.get_current_history_length()) def write_history_file(self, filename: str = gethistoryfile()) -> None: maxlength = self.saved_history_length @@ -457,6 +462,19 @@ def write_history_file(self, filename: str = gethistoryfile()) -> None: entry = entry.replace("\n", "\r\n") # multiline history support f.write(entry + "\n") + def append_history_file(self, filename: str = gethistoryfile()) -> None: + reader = self.get_reader() + saved_length = self.get_history_length() + length = self.get_current_history_length() - saved_length + history = reader.get_trimmed_history(length) + f = open(os.path.expanduser(filename), "a", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + self.set_history_length(saved_length + length) + def clear_history(self) -> None: del self.get_reader().history[:] @@ -526,6 +544,7 @@ def insert_text(self, text: str) -> None: get_current_history_length = _wrapper.get_current_history_length read_history_file = _wrapper.read_history_file write_history_file = _wrapper.write_history_file +append_history_file = _wrapper.append_history_file clear_history = _wrapper.clear_history get_history_item = _wrapper.get_history_item remove_history_item = _wrapper.remove_history_item @@ -587,6 +606,7 @@ def _setup(namespace: Mapping[str, Any]) -> None: # set up namespace in rlcompleter, which requires it to be a bona fide dict if not isinstance(namespace, dict): namespace = dict(namespace) + _wrapper.config.module_completer = ModuleCompleter(namespace) _wrapper.config.readline_completer = RLCompleter(namespace).complete # this is not really what readline.c does. Better than nothing I guess diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py index a08546a9319..965b853c34b 100644 --- a/Lib/_pyrepl/simple_interact.py +++ b/Lib/_pyrepl/simple_interact.py @@ -30,8 +30,10 @@ import os import sys import code +import warnings +import errno -from .readline import _get_reader, multiline_input +from .readline import _get_reader, multiline_input, append_history_file _error: tuple[type[Exception], ...] | type[Exception] @@ -109,6 +111,10 @@ def run_multiline_interactive_console( more_lines = functools.partial(_more_lines, console) input_n = 0 + _is_x_showrefcount_set = sys._xoptions.get("showrefcount") + _is_pydebug_build = hasattr(sys, "gettotalrefcount") + show_ref_count = _is_x_showrefcount_set and _is_pydebug_build + def maybe_run_command(statement: str) -> bool: statement = statement.strip() if statement in console.locals or statement not in REPL_COMMANDS: @@ -144,6 +150,11 @@ def maybe_run_command(statement: str) -> bool: input_name = f"" more = console.push(_strip_final_indent(statement), filename=input_name, _symbol="single") # type: ignore[call-arg] assert not more + try: + append_history_file() + except (FileNotFoundError, PermissionError, OSError) as e: + warnings.warn(f"failed to open the history file for writing: {e}") + input_n += 1 except KeyboardInterrupt: r = _get_reader() @@ -152,9 +163,18 @@ def maybe_run_command(statement: str) -> bool: r.pos = len(r.get_unicode()) r.dirty = True r.refresh() - r.in_bracketed_paste = False console.write("\nKeyboardInterrupt\n") console.resetbuffer() except MemoryError: console.write("\nMemoryError\n") console.resetbuffer() + except SystemExit: + raise + except: + console.showtraceback() + console.resetbuffer() + if show_ref_count: + console.write( + f"[{sys.gettotalrefcount()} refs," + f" {sys.getallocatedblocks()} blocks]\n" + ) diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index 96379bc20f3..d21cdd9b076 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -29,6 +29,7 @@ import struct import termios import time +import types import platform from fcntl import ioctl @@ -39,6 +40,12 @@ from .unix_eventqueue import EventQueue from .utils import wlen +# declare posix optional to allow None assignment on other platforms +posix: types.ModuleType | None +try: + import posix +except ImportError: + posix = None TYPE_CHECKING = False @@ -150,8 +157,6 @@ def __init__( self.pollob = poll() self.pollob.register(self.input_fd, select.POLLIN) - self.input_buffer = b"" - self.input_buffer_pos = 0 curses.setupterm(term or None, self.output_fd) self.term = term @@ -199,22 +204,14 @@ def _my_getstr(cap: str, optional: bool = False) -> bytes | None: self.event_queue = EventQueue(self.input_fd, self.encoding) self.cursor_visible = 1 - def more_in_buffer(self) -> bool: - return bool( - self.input_buffer - and self.input_buffer_pos < len(self.input_buffer) - ) + signal.signal(signal.SIGCONT, self._sigcont_handler) + + def _sigcont_handler(self, signum, frame): + self.restore() + self.prepare() def __read(self, n: int) -> bytes: - if not self.more_in_buffer(): - self.input_buffer = os.read(self.input_fd, 10000) - - ret = self.input_buffer[self.input_buffer_pos : self.input_buffer_pos + n] - self.input_buffer_pos += len(ret) - if self.input_buffer_pos >= len(self.input_buffer): - self.input_buffer = b"" - self.input_buffer_pos = 0 - return ret + return os.read(self.input_fd, n) def change_encoding(self, encoding: str) -> None: @@ -422,7 +419,6 @@ def wait(self, timeout: float | None = None) -> bool: """ return ( not self.event_queue.empty() - or self.more_in_buffer() or bool(self.pollob.poll(timeout)) ) @@ -525,6 +521,7 @@ def getpending(self): e.raw += e.raw amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] + trace("getpending({a})", a=amount) raw = self.__read(amount) data = str(raw, self.encoding, "replace") e.data += data @@ -566,11 +563,9 @@ def clear(self): @property def input_hook(self): - try: - import posix - except ImportError: - return None - if posix._is_inputhook_installed(): + # avoid inline imports here so the repl doesn't get flooded + # with import logging from -X importtime=2 + if posix is not None and posix._is_inputhook_installed(): return posix._inputhook def __enable_bracketed_paste(self) -> None: diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py index 7437fbe1ab9..e04fbdc6c8a 100644 --- a/Lib/_pyrepl/utils.py +++ b/Lib/_pyrepl/utils.py @@ -1,6 +1,17 @@ -import re -import unicodedata +from __future__ import annotations +import builtins import functools +import keyword +import re +import token as T +import tokenize +import unicodedata +import _colorize + +from collections import deque +from io import StringIO +from tokenize import TokenInfo as TI +from typing import Iterable, Iterator, Match, NamedTuple, Self from .types import CharBuffer, CharWidths from .trace import trace @@ -8,6 +19,43 @@ ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) +IDENTIFIERS_AFTER = {"def", "class"} +BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} + + +def THEME(**kwargs): + # Not cached: the user can modify the theme inside the interactive session. + return _colorize.get_theme(**kwargs).syntax + + +class Span(NamedTuple): + """Span indexing that's inclusive on both ends.""" + + start: int + end: int + + @classmethod + def from_re(cls, m: Match[str], group: int | str) -> Self: + re_span = m.span(group) + return cls(re_span[0], re_span[1] - 1) + + @classmethod + def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + + return cls( + line_len[token.start[0] - 1] + token.start[1], + line_len[token.end[0] - 1] + token.end[1] + end_offset, + ) + + +class ColorSpan(NamedTuple): + span: Span + tag: str @functools.cache @@ -41,17 +89,212 @@ def unbracket(s: str, including_content: bool = False) -> str: return s.translate(ZERO_WIDTH_TRANS) -def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: - r"""Decompose the input buffer into a printable variant. +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + sio = StringIO(buffer) + line_lengths = [0] + [len(line) for line in sio.readlines()] + # make line_lengths cumulative + for i in range(1, len(line_lengths)): + line_lengths[i] += line_lengths[i-1] + + sio.seek(0) + gen = tokenize.generate_tokens(sio.readline) + last_emitted: ColorSpan | None = None + try: + for color in gen_colors_from_token_stream(gen, line_lengths): + yield color + last_emitted = color + except SyntaxError: + return + except tokenize.TokenError as te: + yield from recover_unterminated_string( + te, line_lengths, last_emitted, buffer + ) + + +def recover_unterminated_string( + exc: tokenize.TokenError, + line_lengths: list[int], + last_emitted: ColorSpan | None, + buffer: str, +) -> Iterator[ColorSpan]: + msg, loc = exc.args + if loc is None: + return + + line_no, column = loc + + if msg.startswith( + ( + "unterminated string literal", + "unterminated f-string literal", + "unterminated t-string literal", + "EOF in multi-line string", + "unterminated triple-quoted f-string literal", + "unterminated triple-quoted t-string literal", + ) + ): + start = line_lengths[line_no - 1] + column - 1 + end = line_lengths[-1] - 1 + + # in case FSTRING_START was already emitted + if last_emitted and start <= last_emitted.span.start: + trace("before last emitted = {s}", s=start) + start = last_emitted.span.end + 1 + + span = Span(start, end) + trace("yielding span {a} -> {b}", a=span.start, b=span.end) + yield ColorSpan(span, "string") + else: + trace( + "unhandled token error({buffer}) = {te}", + buffer=repr(buffer), + te=str(exc), + ) + + +def gen_colors_from_token_stream( + token_generator: Iterator[TI], + line_lengths: list[int], +) -> Iterator[ColorSpan]: + token_window = prev_next_window(token_generator) + + is_def_name = False + bracket_level = 0 + for prev_token, token, next_token in token_window: + assert token is not None + if token.start == token.end: + continue + + match token.type: + case ( + T.STRING + | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END + | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "string") + case T.COMMENT: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "comment") + case T.NUMBER: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "number") + case T.OP: + if token.string in "([{": + bracket_level += 1 + elif token.string in ")]}": + bracket_level -= 1 + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "op") + case T.NAME: + if is_def_name: + is_def_name = False + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "definition") + elif keyword.iskeyword(token.string): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "keyword") + if token.string in IDENTIFIERS_AFTER: + is_def_name = True + elif ( + keyword.issoftkeyword(token.string) + and bracket_level == 0 + and is_soft_keyword_used(prev_token, token, next_token) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "soft_keyword") + elif token.string in BUILTINS: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "builtin") + + +keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"} +keyword_first_sets_case = {"False", "None", "True"} + + +def is_soft_keyword_used(*tokens: TI | None) -> bool: + """Returns True if the current token is a keyword in this context. + + For the `*tokens` to match anything, they have to be a three-tuple of + (previous, current, next). + """ + trace("is_soft_keyword_used{t}", t=tokens) + match tokens: + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_match + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="case"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "-" | "[" | "{") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="case"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_case + return True + case (TI(string="case"), TI(string="_"), TI(string=":")): + return True + case _: + return False + + +def disp_str( + buffer: str, + colors: list[ColorSpan] | None = None, + start_index: int = 0, + force_color: bool = False, +) -> tuple[CharBuffer, CharWidths]: + r"""Decompose the input buffer into a printable variant with applied colors. Returns a tuple of two lists: - - the first list is the input buffer, character by character; + - the first list is the input buffer, character by character, with color + escape codes added (while those codes contain multiple ASCII characters, + each code is considered atomic *and is attached for the corresponding + visible character*); - the second list is the visible width of each character in the input buffer. + Note on colors: + - The `colors` list, if provided, is partially consumed within. We're using + a list and not a generator since we need to hold onto the current + unfinished span between calls to disp_str in case of multiline strings. + - The `colors` list is computed from the start of the input block. `buffer` + is only a subset of that input block, a single line within. This is why + we need `start_index` to inform us which position is the start of `buffer` + actually within user input. This allows us to match color spans correctly. + Examples: >>> utils.disp_str("a = 9") (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) + + >>> line = "while 1:" + >>> colors = list(utils.gen_colors(line)) + >>> utils.disp_str(line, colors=colors) + (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1]) + """ chars: CharBuffer = [] char_widths: CharWidths = [] @@ -59,7 +302,21 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: if not buffer: return chars, char_widths - for c in buffer: + while colors and colors[0].span.end < start_index: + # move past irrelevant spans + colors.pop(0) + + theme = THEME(force_color=force_color) + pre_color = "" + post_color = "" + if colors and colors[0].span.start < start_index: + # looks like we're continuing a previous color (e.g. a multiline str) + pre_color = theme[colors[0].tag] + + for i, c in enumerate(buffer, start_index): + if colors and colors[0].span.start == i: # new color starts now + pre_color = theme[colors[0].tag] + if c == "\x1a": # CTRL-Z on Windows chars.append(c) char_widths.append(2) @@ -73,5 +330,43 @@ def disp_str(buffer: str) -> tuple[CharBuffer, CharWidths]: else: chars.append(c) char_widths.append(str_width(c)) - trace("disp_str({buffer}) = {s}, {b}", buffer=repr(buffer), s=chars, b=char_widths) + + if colors and colors[0].span.end == i: # current color ends now + post_color = theme.reset + colors.pop(0) + + chars[-1] = pre_color + chars[-1] + post_color + pre_color = "" + post_color = "" + + if colors and colors[0].span.start < i and colors[0].span.end > i: + # even though the current color should be continued, reset it for now. + # the next call to `disp_str()` will revive it. + chars[-1] += theme.reset + return chars, char_widths + + +def prev_next_window[T]( + iterable: Iterable[T] +) -> Iterator[tuple[T | None, ...]]: + """Generates three-tuples of (previous, current, next) items. + + On the first iteration previous is None. On the last iteration next + is None. In case of exception next is None and the exception is re-raised + on a subsequent next() call. + + Inspired by `sliding_window` from `itertools` recipes. + """ + + iterator = iter(iterable) + window = deque((None, next(iterator)), maxlen=3) + try: + for x in iterator: + window.append(x) + yield tuple(window) + except Exception: + raise + finally: + window.append(None) + yield tuple(window) diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 47fd3fd8f89..95749198b3b 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -22,10 +22,9 @@ import io import os import sys -import time -import msvcrt import ctypes +import types from ctypes.wintypes import ( _COORD, WORD, @@ -44,7 +43,7 @@ from .windows_eventqueue import EventQueue try: - from ctypes import GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] + from ctypes import get_last_error, GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] except: # Keep MyPy happy off Windows from ctypes import CDLL as WinDLL, cdll as windll @@ -52,11 +51,20 @@ def GetLastError() -> int: return 42 + def get_last_error() -> int: + return 42 + class WinError(OSError): # type: ignore[no-redef] def __init__(self, err: int | None, descr: str | None = None) -> None: self.err = err self.descr = descr +# declare nt optional to allow None assignment on other platforms +nt: types.ModuleType | None +try: + import nt +except ImportError: + nt = None TYPE_CHECKING = False @@ -108,15 +116,20 @@ def __init__(self, err: int | None, descr: str | None = None) -> None: ALT_ACTIVE = 0x01 | 0x02 CTRL_ACTIVE = 0x04 | 0x08 +WAIT_TIMEOUT = 0x102 +WAIT_FAILED = 0xFFFFFFFF + +# from winbase.h +INFINITE = 0xFFFFFFFF + class _error(Exception): pass def _supports_vt(): try: - import nt return nt._supports_virtual_terminal() - except (ImportError, AttributeError): + except AttributeError: return False class WindowsConsole(Console): @@ -228,11 +241,9 @@ def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: @property def input_hook(self): - try: - import nt - except ImportError: - return None - if nt._is_inputhook_installed(): + # avoid inline imports here so the repl doesn't get flooded + # with import logging from -X importtime=2 + if nt is not None and nt._is_inputhook_installed(): return nt._inputhook def __write_changed_line( @@ -409,12 +420,8 @@ def _getscrollbacksize(self) -> int: return info.srWindow.Bottom # type: ignore[no-any-return] def _read_input(self, block: bool = True) -> INPUT_RECORD | None: - if not block: - events = DWORD() - if not GetNumberOfConsoleInputEvents(InHandle, events): - raise WinError(GetLastError()) - if not events.value: - return None + if not block and not self.wait(timeout=0): + return None rec = INPUT_RECORD() read = DWORD() @@ -423,6 +430,20 @@ def _read_input(self, block: bool = True) -> INPUT_RECORD | None: return rec + def _read_input_bulk( + self, block: bool, n: int + ) -> tuple[ctypes.Array[INPUT_RECORD], int]: + rec = (n * INPUT_RECORD)() + read = DWORD() + + if not block and not self.wait(timeout=0): + return rec, 0 + + if not ReadConsoleInput(InHandle, rec, n, read): + raise WinError(GetLastError()) + + return rec, read.value + def get_event(self, block: bool = True) -> Event | None: """Return an Event instance. Returns None if |block| is false and there is no event pending, otherwise waits for the @@ -447,7 +468,7 @@ def get_event(self, block: bool = True) -> Event | None: if key == "\r": # Make enter unix-like - return Event(evt="key", data="\n", raw=b"\n") + return Event(evt="key", data="\n") elif key_event.wVirtualKeyCode == 8: # Turn backspace directly into the command key = "backspace" @@ -459,24 +480,29 @@ def get_event(self, block: bool = True) -> Event | None: key = f"ctrl {key}" elif key_event.dwControlKeyState & ALT_ACTIVE: # queue the key, return the meta command - self.event_queue.insert(Event(evt="key", data=key, raw=key)) + self.event_queue.insert(Event(evt="key", data=key)) return Event(evt="key", data="\033") # keymap.py uses this for meta - return Event(evt="key", data=key, raw=key) + return Event(evt="key", data=key) if block: continue return None elif self.__vt_support: # If virtual terminal is enabled, scanning VT sequences - self.event_queue.push(rec.Event.KeyEvent.uChar.UnicodeChar) + for char in raw_key.encode(self.event_queue.encoding, "replace"): + self.event_queue.push(char) continue if key_event.dwControlKeyState & ALT_ACTIVE: - # queue the key, return the meta command - self.event_queue.insert(Event(evt="key", data=key, raw=raw_key)) - return Event(evt="key", data="\033") # keymap.py uses this for meta + # Do not swallow characters that have been entered via AltGr: + # Windows internally converts AltGr to CTRL+ALT, see + # https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-vkkeyscanw + if not key_event.dwControlKeyState & CTRL_ACTIVE: + # queue the key, return the meta command + self.event_queue.insert(Event(evt="key", data=key)) + return Event(evt="key", data="\033") # keymap.py uses this for meta - return Event(evt="key", data=key, raw=raw_key) + return Event(evt="key", data=key) return self.event_queue.get() def push_char(self, char: int | bytes) -> None: @@ -518,18 +544,36 @@ def forgetinput(self) -> None: def getpending(self) -> Event: """Return the characters that have been typed but not yet processed.""" - return Event("key", "", b"") + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + if e2: + e.data += e2.data + + recs, rec_count = self._read_input_bulk(False, 1024) + for i in range(rec_count): + rec = recs[i] + if rec and rec.EventType == KEY_EVENT: + key_event = rec.Event.KeyEvent + ch = key_event.uChar.UnicodeChar + if ch == "\r": + ch += "\n" + e.data += ch + return e def wait(self, timeout: float | None) -> bool: """Wait for an event.""" - # Poor man's Windows select loop - start_time = time.time() - while True: - if msvcrt.kbhit(): # type: ignore[attr-defined] - return True - if timeout and time.time() - start_time > timeout / 1000: - return False - time.sleep(0.01) + if timeout is None: + timeout = INFINITE + else: + timeout = int(timeout) + ret = WaitForSingleObject(InHandle, timeout) + if ret == WAIT_FAILED: + raise WinError(get_last_error()) + elif ret == WAIT_TIMEOUT: + return False + return True def repaint(self) -> None: raise NotImplementedError("No repaint support") @@ -649,14 +693,15 @@ class INPUT_RECORD(Structure): ReadConsoleInput.argtypes = [HANDLE, POINTER(INPUT_RECORD), DWORD, POINTER(DWORD)] ReadConsoleInput.restype = BOOL - GetNumberOfConsoleInputEvents = _KERNEL32.GetNumberOfConsoleInputEvents - GetNumberOfConsoleInputEvents.argtypes = [HANDLE, POINTER(DWORD)] - GetNumberOfConsoleInputEvents.restype = BOOL FlushConsoleInputBuffer = _KERNEL32.FlushConsoleInputBuffer FlushConsoleInputBuffer.argtypes = [HANDLE] FlushConsoleInputBuffer.restype = BOOL + WaitForSingleObject = _KERNEL32.WaitForSingleObject + WaitForSingleObject.argtypes = [HANDLE, DWORD] + WaitForSingleObject.restype = DWORD + OutHandle = GetStdHandle(STD_OUTPUT_HANDLE) InHandle = GetStdHandle(STD_INPUT_HANDLE) else: @@ -670,7 +715,7 @@ def _win_only(*args, **kwargs): GetConsoleMode = _win_only SetConsoleMode = _win_only ReadConsoleInput = _win_only - GetNumberOfConsoleInputEvents = _win_only FlushConsoleInputBuffer = _win_only + WaitForSingleObject = _win_only OutHandle = 0 InHandle = 0 diff --git a/Lib/_strptime.py b/Lib/_strptime.py index aa63933a49d..ae67949626d 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -302,7 +302,7 @@ def __init__(self, locale_time=None): # W is set below by using 'U' 'y': r"(?P\d\d)", 'Y': r"(?P\d\d\d\d)", - 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", + 'z': r"(?P([+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?)|(?-i:Z))?", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), @@ -548,27 +548,28 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): iso_week = int(found_dict['V']) elif group_key == 'z': z = found_dict['z'] - if z == 'Z': - gmtoff = 0 - else: - if z[3] == ':': - z = z[:3] + z[4:] - if len(z) > 5: - if z[5] != ':': - msg = f"Inconsistent use of : in {found_dict['z']}" - raise ValueError(msg) - z = z[:5] + z[6:] - hours = int(z[1:3]) - minutes = int(z[3:5]) - seconds = int(z[5:7] or 0) - gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds - gmtoff_remainder = z[8:] - # Pad to always return microseconds. - gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder)) - gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding) - if z.startswith("-"): - gmtoff = -gmtoff - gmtoff_fraction = -gmtoff_fraction + if z: + if z == 'Z': + gmtoff = 0 + else: + if z[3] == ':': + z = z[:3] + z[4:] + if len(z) > 5: + if z[5] != ':': + msg = f"Inconsistent use of : in {found_dict['z']}" + raise ValueError(msg) + z = z[:5] + z[6:] + hours = int(z[1:3]) + minutes = int(z[3:5]) + seconds = int(z[5:7] or 0) + gmtoff = (hours * 60 * 60) + (minutes * 60) + seconds + gmtoff_remainder = z[8:] + # Pad to always return microseconds. + gmtoff_remainder_padding = "0" * (6 - len(gmtoff_remainder)) + gmtoff_fraction = int(gmtoff_remainder + gmtoff_remainder_padding) + if z.startswith("-"): + gmtoff = -gmtoff + gmtoff_fraction = -gmtoff_fraction elif group_key == 'Z': # Since -1 is default value only need to worry about setting tz if # it can be something other than -1. diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index b006d76c4e2..0b9e5d3bbf6 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -4,128 +4,6 @@ class. Depending on the version of Python you're using, there may be a faster one available. You should always import the `local` class from `threading`.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = sorted(mydata.__dict__.items()) - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... def __init__(self, /, **kw): - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red')], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> mydata.number - 11 - ->>> del mydata """ from weakref import ref diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index d6243c88636..32b85534589 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -12,10 +12,10 @@ "ForwardRef", "call_annotate_function", "call_evaluate_function", - "get_annotate_function", + "get_annotate_from_class_namespace", "get_annotations", "annotations_to_string", - "value_to_string", + "type_repr", ] @@ -38,6 +38,7 @@ class Format(enum.IntEnum): "__weakref__", "__arg__", "__globals__", + "__extra_names__", "__code__", "__ast_node__", "__cell__", @@ -77,20 +78,42 @@ def __init__( self.__forward_is_argument__ = is_argument self.__forward_is_class__ = is_class self.__forward_module__ = module + self.__owner__ = owner + # These are always set to None here but may be non-None if a ForwardRef + # is created through __class__ assignment on a _Stringifier object. self.__globals__ = None + self.__cell__ = None + self.__extra_names__ = None + # These are initially None but serve as a cache and may be set to a non-None + # value later. self.__code__ = None self.__ast_node__ = None - self.__cell__ = None - self.__owner__ = owner def __init_subclass__(cls, /, *args, **kwds): raise TypeError("Cannot subclass ForwardRef") - def evaluate(self, *, globals=None, locals=None, type_params=None, owner=None): + def evaluate( + self, + *, + globals=None, + locals=None, + type_params=None, + owner=None, + format=Format.VALUE, + ): """Evaluate the forward reference and return the value. If the forward reference cannot be evaluated, raise an exception. """ + match format: + case Format.STRING: + return self.__forward_arg__ + case Format.VALUE: + is_forwardref_format = False + case Format.FORWARDREF: + is_forwardref_format = True + case _: + raise NotImplementedError(format) if self.__cell__ is not None: try: return self.__cell__.cell_contents @@ -147,21 +170,42 @@ def evaluate(self, *, globals=None, locals=None, type_params=None, owner=None): if not self.__forward_is_class__ or param_name not in globals: globals[param_name] = param locals.pop(param_name, None) + if self.__extra_names__: + locals = {**locals, **self.__extra_names__} arg = self.__forward_arg__ if arg.isidentifier() and not keyword.iskeyword(arg): if arg in locals: - value = locals[arg] + return locals[arg] elif arg in globals: - value = globals[arg] + return globals[arg] elif hasattr(builtins, arg): return getattr(builtins, arg) + elif is_forwardref_format: + return self else: raise NameError(arg) else: code = self.__forward_code__ - value = eval(code, globals=globals, locals=locals) - return value + try: + return eval(code, globals=globals, locals=locals) + except Exception: + if not is_forwardref_format: + raise + new_locals = _StringifierDict( + {**builtins.__dict__, **locals}, + globals=globals, + owner=owner, + is_class=self.__forward_is_class__, + format=format, + ) + try: + result = eval(code, globals=globals, locals=new_locals) + except Exception: + return self + else: + new_locals.transmogrify() + return result def _evaluate(self, globalns, localns, type_params=_sentinel, *, recursive_guard): import typing @@ -225,10 +269,12 @@ def __eq__(self, other): # because dictionaries are not hashable. and self.__globals__ is other.__globals__ and self.__forward_is_class__ == other.__forward_is_class__ - and self.__code__ == other.__code__ - and self.__ast_node__ == other.__ast_node__ and self.__cell__ == other.__cell__ and self.__owner__ == other.__owner__ + and ( + (tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None) == + (tuple(sorted(other.__extra_names__.items())) if other.__extra_names__ else None) + ) ) def __hash__(self): @@ -237,10 +283,9 @@ def __hash__(self): self.__forward_module__, id(self.__globals__), # dictionaries are not hashable, so hash by identity self.__forward_is_class__, - self.__code__, - self.__ast_node__, self.__cell__, self.__owner__, + tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None, )) def __or__(self, other): @@ -260,6 +305,9 @@ def __repr__(self): return f"ForwardRef({self.__forward_arg__!r}{''.join(extra)})" +_Template = type(t"") + + class _Stringifier: # Must match the slots on ForwardRef, so we can turn an instance of one into an # instance of the other in place. @@ -274,6 +322,7 @@ def __init__( cell=None, *, stringifier_dict, + extra_names=None, ): # Either an AST node or a simple str (for the common case where a ForwardRef # represent a single name). @@ -285,6 +334,7 @@ def __init__( self.__code__ = None self.__ast_node__ = node self.__globals__ = globals + self.__extra_names__ = extra_names self.__cell__ = cell self.__owner__ = owner self.__stringifier_dict__ = stringifier_dict @@ -292,28 +342,65 @@ def __init__( def __convert_to_ast(self, other): if isinstance(other, _Stringifier): if isinstance(other.__ast_node__, str): - return ast.Name(id=other.__ast_node__) - return other.__ast_node__ - elif isinstance(other, slice): - return ast.Slice( - lower=( - self.__convert_to_ast(other.start) - if other.start is not None - else None - ), - upper=( - self.__convert_to_ast(other.stop) - if other.stop is not None - else None - ), - step=( - self.__convert_to_ast(other.step) - if other.step is not None - else None - ), - ) + return ast.Name(id=other.__ast_node__), other.__extra_names__ + return other.__ast_node__, other.__extra_names__ + elif type(other) is _Template: + return _template_to_ast(other), None + elif ( + # In STRING format we don't bother with the create_unique_name() dance; + # it's better to emit the repr() of the object instead of an opaque name. + self.__stringifier_dict__.format == Format.STRING + or other is None + or type(other) in (str, int, float, bool, complex) + ): + return ast.Constant(value=other), None + elif type(other) is dict: + extra_names = {} + keys = [] + values = [] + for key, value in other.items(): + new_key, new_extra_names = self.__convert_to_ast(key) + if new_extra_names is not None: + extra_names.update(new_extra_names) + keys.append(new_key) + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + values.append(new_value) + return ast.Dict(keys, values), extra_names + elif type(other) in (list, tuple, set): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + ast_class = {list: ast.List, tuple: ast.Tuple, set: ast.Set}[type(other)] + return ast_class(elts), extra_names else: - return ast.Constant(value=other) + name = self.__stringifier_dict__.create_unique_name() + return ast.Name(id=name), {name: other} + + def __convert_to_ast_getitem(self, other): + if isinstance(other, slice): + extra_names = {} + + def conv(obj): + if obj is None: + return None + new_obj, new_extra_names = self.__convert_to_ast(obj) + if new_extra_names is not None: + extra_names.update(new_extra_names) + return new_obj + + return ast.Slice( + lower=conv(other.start), + upper=conv(other.stop), + step=conv(other.step), + ), extra_names + else: + return self.__convert_to_ast(other) def __get_ast(self): node = self.__ast_node__ @@ -321,13 +408,19 @@ def __get_ast(self): return ast.Name(id=node) return node - def __make_new(self, node): + def __make_new(self, node, extra_names=None): + new_extra_names = {} + if self.__extra_names__ is not None: + new_extra_names.update(self.__extra_names__) + if extra_names is not None: + new_extra_names.update(extra_names) stringifier = _Stringifier( node, self.__globals__, self.__owner__, self.__forward_is_class__, stringifier_dict=self.__stringifier_dict__, + extra_names=new_extra_names or None, ) self.__stringifier_dict__.stringifiers.append(stringifier) return stringifier @@ -343,27 +436,37 @@ def __getitem__(self, other): if self.__ast_node__ == "__classdict__": raise KeyError if isinstance(other, tuple): - elts = [self.__convert_to_ast(elt) for elt in other] + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast_getitem(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) other = ast.Tuple(elts) else: - other = self.__convert_to_ast(other) + other, extra_names = self.__convert_to_ast_getitem(other) assert isinstance(other, ast.AST), repr(other) - return self.__make_new(ast.Subscript(self.__get_ast(), other)) + return self.__make_new(ast.Subscript(self.__get_ast(), other), extra_names) def __getattr__(self, attr): return self.__make_new(ast.Attribute(self.__get_ast(), attr)) def __call__(self, *args, **kwargs): - return self.__make_new( - ast.Call( - self.__get_ast(), - [self.__convert_to_ast(arg) for arg in args], - [ - ast.keyword(key, self.__convert_to_ast(value)) - for key, value in kwargs.items() - ], - ) - ) + extra_names = {} + ast_args = [] + for arg in args: + new_arg, new_extra_names = self.__convert_to_ast(arg) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_args.append(new_arg) + ast_kwargs = [] + for key, value in kwargs.items(): + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_kwargs.append(ast.keyword(key, new_value)) + return self.__make_new(ast.Call(self.__get_ast(), ast_args, ast_kwargs), extra_names) def __iter__(self): yield self.__make_new(ast.Starred(self.__get_ast())) @@ -378,8 +481,9 @@ def __format__(self, format_spec): def _make_binop(op: ast.AST): def binop(self, other): + rhs, extra_names = self.__convert_to_ast(other) return self.__make_new( - ast.BinOp(self.__get_ast(), op, self.__convert_to_ast(other)) + ast.BinOp(self.__get_ast(), op, rhs), extra_names ) return binop @@ -402,8 +506,9 @@ def binop(self, other): def _make_rbinop(op: ast.AST): def rbinop(self, other): + new_other, extra_names = self.__convert_to_ast(other) return self.__make_new( - ast.BinOp(self.__convert_to_ast(other), op, self.__get_ast()) + ast.BinOp(new_other, op, self.__get_ast()), extra_names ) return rbinop @@ -426,12 +531,14 @@ def rbinop(self, other): def _make_compare(op): def compare(self, other): + rhs, extra_names = self.__convert_to_ast(other) return self.__make_new( ast.Compare( left=self.__get_ast(), ops=[op], - comparators=[self.__convert_to_ast(other)], - ) + comparators=[rhs], + ), + extra_names, ) return compare @@ -458,14 +565,42 @@ def unary_op(self): del _make_unary_op +def _template_to_ast(template): + values = [] + for part in template: + match part: + case str(): + values.append(ast.Constant(value=part)) + # Interpolation, but we don't want to import the string module + case _: + interp = ast.Interpolation( + str=part.expression, + value=ast.parse(part.expression), + conversion=( + ord(part.conversion) + if part.conversion is not None + else -1 + ), + format_spec=( + ast.Constant(value=part.format_spec) + if part.format_spec != "" + else None + ), + ) + values.append(interp) + return ast.TemplateStr(values=values) + + class _StringifierDict(dict): - def __init__(self, namespace, globals=None, owner=None, is_class=False): + def __init__(self, namespace, *, globals=None, owner=None, is_class=False, format): super().__init__(namespace) self.namespace = namespace self.globals = globals self.owner = owner self.is_class = is_class self.stringifiers = [] + self.next_id = 1 + self.format = format def __missing__(self, key): fwdref = _Stringifier( @@ -478,6 +613,19 @@ def __missing__(self, key): self.stringifiers.append(fwdref) return fwdref + def transmogrify(self): + for obj in self.stringifiers: + obj.__class__ = ForwardRef + obj.__stringifier_dict__ = None # not needed for ForwardRef + if isinstance(obj.__ast_node__, str): + obj.__arg__ = obj.__ast_node__ + obj.__ast_node__ = None + + def create_unique_name(self): + name = f"__annotationlib_name_{self.next_id}__" + self.next_id += 1 + return name + def call_evaluate_function(evaluate, format, *, owner=None): """Call an evaluate function. Evaluate functions are normally generated for @@ -521,20 +669,11 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): # possibly constants if the annotate function uses them directly). We then # convert each of those into a string to get an approximation of the # original source. - globals = _StringifierDict({}) - if annotate.__closure__: - freevars = annotate.__code__.co_freevars - new_closure = [] - for i, cell in enumerate(annotate.__closure__): - if i < len(freevars): - name = freevars[i] - else: - name = "__cell__" - fwdref = _Stringifier(name, stringifier_dict=globals) - new_closure.append(types.CellType(fwdref)) - closure = tuple(new_closure) - else: - closure = None + globals = _StringifierDict({}, format=format) + is_class = isinstance(owner, type) + closure = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) func = types.FunctionType( annotate.__code__, globals, @@ -544,9 +683,9 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): ) annos = func(Format.VALUE_WITH_FAKE_GLOBALS) if _is_evaluate: - return annos if isinstance(annos, str) else repr(annos) + return _stringify_single(annos) return { - key: val if isinstance(val, str) else repr(val) + key: _stringify_single(val) for key, val in annos.items() } elif format == Format.FORWARDREF: @@ -569,33 +708,43 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): # that returns a bool and an defined set of attributes. namespace = {**annotate.__builtins__, **annotate.__globals__} is_class = isinstance(owner, type) - globals = _StringifierDict(namespace, annotate.__globals__, owner, is_class) - if annotate.__closure__: - freevars = annotate.__code__.co_freevars - new_closure = [] - for i, cell in enumerate(annotate.__closure__): - try: - cell.cell_contents - except ValueError: - if i < len(freevars): - name = freevars[i] - else: - name = "__cell__" - fwdref = _Stringifier( - name, - cell=cell, - owner=owner, - globals=annotate.__globals__, - is_class=is_class, - stringifier_dict=globals, - ) - globals.stringifiers.append(fwdref) - new_closure.append(types.CellType(fwdref)) - else: - new_closure.append(cell) - closure = tuple(new_closure) + globals = _StringifierDict( + namespace, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=True + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + try: + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + except Exception: + pass else: - closure = None + globals.transmogrify() + return result + + # Try again, but do not provide any globals. This allows us to return + # a value in certain cases where an exception gets raised during evaluation. + globals = _StringifierDict( + {}, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) func = types.FunctionType( annotate.__code__, globals, @@ -604,13 +753,21 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): kwdefaults=annotate.__kwdefaults__, ) result = func(Format.VALUE_WITH_FAKE_GLOBALS) - for obj in globals.stringifiers: - obj.__class__ = ForwardRef - obj.__stringifier_dict__ = None # not needed for ForwardRef - if isinstance(obj.__ast_node__, str): - obj.__arg__ = obj.__ast_node__ - obj.__ast_node__ = None - return result + globals.transmogrify() + if _is_evaluate: + if isinstance(result, ForwardRef): + return result.evaluate(format=Format.FORWARDREF) + else: + return result + else: + return { + key: ( + val.evaluate(format=Format.FORWARDREF) + if isinstance(val, ForwardRef) + else val + ) + for key, val in result.items() + } elif format == Format.VALUE: # Should be impossible because __annotate__ functions must not raise # NotImplementedError for this format. @@ -619,29 +776,61 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): raise ValueError(f"Invalid format: {format!r}") -# We use the descriptors from builtins.type instead of accessing -# .__annotations__ and .__annotate__ directly on class objects, because -# otherwise we could get wrong results in some cases involving metaclasses. -# See PEP 749. -_BASE_GET_ANNOTATE = type.__dict__["__annotate__"].__get__ -_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ +def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): + if not annotate.__closure__: + return None + freevars = annotate.__code__.co_freevars + new_closure = [] + for i, cell in enumerate(annotate.__closure__): + if i < len(freevars): + name = freevars[i] + else: + name = "__cell__" + new_cell = None + if allow_evaluation: + try: + cell.cell_contents + except ValueError: + pass + else: + new_cell = cell + if new_cell is None: + fwdref = _Stringifier( + name, + cell=cell, + owner=owner, + globals=annotate.__globals__, + is_class=is_class, + stringifier_dict=stringifier_dict, + ) + stringifier_dict.stringifiers.append(fwdref) + new_cell = types.CellType(fwdref) + new_closure.append(new_cell) + return tuple(new_closure) -def get_annotate_function(obj): - """Get the __annotate__ function for an object. +def _stringify_single(anno): + if anno is ...: + return "..." + # We have to handle str specially to support PEP 563 stringified annotations. + elif isinstance(anno, str): + return anno + elif isinstance(anno, _Template): + return ast.unparse(_template_to_ast(anno)) + else: + return repr(anno) - obj may be a function, class, or module, or a user-defined type with - an `__annotate__` attribute. - Returns the __annotate__ function or None. +def get_annotate_from_class_namespace(obj): + """Retrieve the annotate function from a class namespace dictionary. + + Return None if the namespace does not contain an annotate function. + This is useful in metaclass ``__new__`` methods to retrieve the annotate function. """ - if isinstance(obj, type): - try: - return _BASE_GET_ANNOTATE(obj) - except AttributeError: - # AttributeError is raised for static types. - return None - return getattr(obj, "__annotate__", None) + try: + return obj["__annotate__"] + except KeyError: + return obj.get("__annotate_func__", None) def get_annotations( @@ -649,12 +838,18 @@ def get_annotations( ): """Compute the annotations dict for an object. - obj may be a callable, class, or module. - Passing in an object of any other type raises TypeError. + obj may be a callable, class, module, or other object with + __annotate__ or __annotations__ attributes. + Passing any other object raises TypeError. - Returns a dict. get_annotations() returns a new dict every time - it's called; calling it twice on the same object will return two - different but equivalent dicts. + The *format* parameter controls the format in which annotations are returned, + and must be a member of the Format enum or its integer equivalent. + For the VALUE format, the __annotations__ is tried first; if it + does not exist, the __annotate__ function is called. The + FORWARDREF format uses __annotations__ if it exists and can be + evaluated, and otherwise falls back to calling the __annotate__ function. + The SOURCE format tries __annotate__ first, and falls back to + using __annotations__, stringified using annotations_to_string(). This function handles several details for you: @@ -696,37 +891,48 @@ def get_annotations( match format: case Format.VALUE: - # For VALUE, we only look at __annotations__ + # For VALUE, we first look at __annotations__ ann = _get_dunder_annotations(obj) + + # If it's not there, try __annotate__ instead + if ann is None: + ann = _get_and_call_annotate(obj, format) case Format.FORWARDREF: # For FORWARDREF, we use __annotations__ if it exists try: - return dict(_get_dunder_annotations(obj)) - except NameError: + ann = _get_dunder_annotations(obj) + except Exception: pass + else: + if ann is not None: + return dict(ann) # But if __annotations__ threw a NameError, we try calling __annotate__ ann = _get_and_call_annotate(obj, format) - if ann is not None: - return ann - - # If that didn't work either, we have a very weird object: evaluating - # __annotations__ threw NameError and there is no __annotate__. In that case, - # we fall back to trying __annotations__ again. - return dict(_get_dunder_annotations(obj)) + if ann is None: + # If that didn't work either, we have a very weird object: evaluating + # __annotations__ threw NameError and there is no __annotate__. In that case, + # we fall back to trying __annotations__ again. + ann = _get_dunder_annotations(obj) case Format.STRING: # For STRING, we try to call __annotate__ ann = _get_and_call_annotate(obj, format) if ann is not None: - return ann + return dict(ann) # But if we didn't get it, we use __annotations__ instead. ann = _get_dunder_annotations(obj) - return annotations_to_string(ann) + if ann is not None: + return annotations_to_string(ann) case Format.VALUE_WITH_FAKE_GLOBALS: raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") case _: raise ValueError(f"Unsupported format {format!r}") + if ann is None: + if isinstance(obj, type) or callable(obj): + return {} + raise TypeError(f"{obj!r} does not have annotations") + if not ann: return {} @@ -755,10 +961,8 @@ def get_annotations( obj_globals = getattr(obj, "__globals__", None) obj_locals = None unwrap = obj - elif ann is not None: - obj_globals = obj_locals = unwrap = None else: - raise TypeError(f"{obj!r} is not a module, class, or callable.") + obj_globals = obj_locals = unwrap = None if unwrap is not None: while True: @@ -793,55 +997,60 @@ def get_annotations( return return_value -def value_to_string(value): +def type_repr(value): """Convert a Python value to a format suitable for use with the STRING format. - This is inteded as a helper for tools that support the STRING format but do + This is intended as a helper for tools that support the STRING format but do not have access to the code that originally produced the annotations. It uses repr() for most objects. """ - if isinstance(value, type): + if isinstance(value, (type, types.FunctionType, types.BuiltinFunctionType)): if value.__module__ == "builtins": return value.__qualname__ return f"{value.__module__}.{value.__qualname__}" + elif isinstance(value, _Template): + tree = _template_to_ast(value) + return ast.unparse(tree) if value is ...: return "..." - if isinstance(value, (types.FunctionType, types.BuiltinFunctionType)): - return value.__name__ return repr(value) def annotations_to_string(annotations): - """Convert an annotation dict containing values to approximately the STRING format.""" + """Convert an annotation dict containing values to approximately the STRING format. + + Always returns a fresh a dictionary. + """ return { - n: t if isinstance(t, str) else value_to_string(t) + n: t if isinstance(t, str) else type_repr(t) for n, t in annotations.items() } def _get_and_call_annotate(obj, format): - annotate = get_annotate_function(obj) + """Get the __annotate__ function and call it. + + May not return a fresh dictionary. + """ + annotate = getattr(obj, "__annotate__", None) if annotate is not None: ann = call_annotate_function(annotate, format, owner=obj) if not isinstance(ann, dict): raise ValueError(f"{obj!r}.__annotate__ returned a non-dict") - return dict(ann) + return ann return None def _get_dunder_annotations(obj): - if isinstance(obj, type): - try: - ann = _BASE_GET_ANNOTATIONS(obj) - except AttributeError: - # For static types, the descriptor raises AttributeError. - return {} - else: - ann = getattr(obj, "__annotations__", None) - if ann is None: - return {} + """Return the annotations for an object, checking that it is a dictionary. + + Does not return a fresh dictionary. + """ + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None if not isinstance(ann, dict): raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") - return dict(ann) + return ann diff --git a/Lib/argparse.py b/Lib/argparse.py index d24fa72e573..d1a6350c3fd 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -161,18 +161,21 @@ class HelpFormatter(object): provided by the class are considered an implementation detail. """ - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - + def __init__( + self, + prog, + indent_increment=2, + max_help_position=24, + width=None, + color=False, + ): # default setting for width if width is None: import shutil width = shutil.get_terminal_size().columns width -= 2 + self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -189,9 +192,20 @@ def __init__(self, self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + def _set_color(self, color): + from _colorize import can_colorize, decolor, get_theme + + if color and can_colorize(): + self._theme = get_theme(force_color=True).argparse + self._decolor = decolor + else: + self._theme = get_theme(force_no_color=True).argparse + self._decolor = lambda text: text + # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -226,7 +240,11 @@ def format_help(self): if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent heading_text = _('%(heading)s:') % dict(heading=self.heading) - heading = '%*s%s\n' % (current_indent, '', heading_text) + t = self.formatter._theme + heading = ( + f'{" " * current_indent}' + f'{t.heading}{heading_text}{t.reset}\n' + ) else: heading = '' @@ -239,6 +257,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -282,6 +301,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -295,16 +315,23 @@ def _join_parts(self, part_strings): if part and part is not SUPPRESS]) def _format_usage(self, usage, actions, groups, prefix): + t = self._theme + if prefix is None: prefix = _('usage: ') # if usage is specified, use that if usage is not None: - usage = usage % dict(prog=self._prog) + usage = ( + t.prog_extra + + usage + % {"prog": f"{t.prog}{self._prog}{t.reset}{t.prog_extra}"} + + t.reset + ) # if no optionals or positionals are available, usage is just prog elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) + usage = f"{t.prog}{self._prog}{t.reset}" # if optionals and positionals are available, calculate usage elif usage is None: @@ -326,7 +353,7 @@ def _format_usage(self, usage, actions, groups, prefix): # wrap the usage parts if it's too long text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: + if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts opt_parts = self._get_actions_usage_parts(optionals, groups) @@ -342,12 +369,13 @@ def get_lines(parts, indent, prefix=None): else: line_len = indent_length - 1 for part in parts: - if line_len + 1 + len(part) > text_width and line: + part_len = len(self._decolor(part)) + if line_len + 1 + part_len > text_width and line: lines.append(indent + ' '.join(line)) line = [] line_len = indent_length - 1 line.append(part) - line_len += len(part) + 1 + line_len += part_len + 1 if line: lines.append(indent + ' '.join(line)) if prefix is not None: @@ -355,8 +383,9 @@ def get_lines(parts, indent, prefix=None): return lines # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) + prog_len = len(self._decolor(prog)) + if len(prefix) + prog_len <= 0.75 * text_width: + indent = ' ' * (len(prefix) + prog_len + 1) if opt_parts: lines = get_lines([prog] + opt_parts, indent, prefix) lines.extend(get_lines(pos_parts, indent)) @@ -379,12 +408,18 @@ def get_lines(parts, indent, prefix=None): # join lines into usage usage = '\n'.join(lines) + usage = usage.removeprefix(prog) + usage = f"{t.prog}{prog}{t.reset}{usage}" + # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) + return f'{t.usage}{prefix}{t.reset}{usage}\n\n' def _format_actions_usage(self, actions, groups): return ' '.join(self._get_actions_usage_parts(actions, groups)) + def _is_long_option(self, string): + return len(string) > 2 + def _get_actions_usage_parts(self, actions, groups): # find group indices and identify actions in groups group_actions = set() @@ -408,6 +443,7 @@ def _get_actions_usage_parts(self, actions, groups): # collect all actions format strings parts = [] + t = self._theme for action in actions: # suppressed arguments are marked with None @@ -417,7 +453,11 @@ def _get_actions_usage_parts(self, actions, groups): # produce all arg strings elif not action.option_strings: default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) + part = ( + t.summary_action + + self._format_args(action, default) + + t.reset + ) # if it's in a group, strip the outer [] if action in group_actions: @@ -427,18 +467,26 @@ def _get_actions_usage_parts(self, actions, groups): # produce the first way to invoke the option in brackets else: option_string = action.option_strings[0] + if self._is_long_option(option_string): + option_color = t.summary_long_option + else: + option_color = t.summary_short_option # if the Optional doesn't take a value, format is: # -s or --long if action.nargs == 0: part = action.format_usage() + part = f"{option_color}{part}{t.reset}" # if the Optional takes a value, format is: # -s ARGS or --long ARGS else: default = self._get_default_metavar_for_optional(action) args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) + part = ( + f"{option_color}{option_string} " + f"{t.summary_label}{args_string}{t.reset}" + ) # make it look optional if it's not required or in a group if not action.required and action not in group_actions: @@ -485,6 +533,7 @@ def _format_action(self, action): help_width = max(self._width - help_position, 11) action_width = help_position - self._current_indent - 2 action_header = self._format_action_invocation(action) + action_header_no_color = self._decolor(action_header) # no help; start on same line and add a final newline if not action.help: @@ -492,9 +541,15 @@ def _format_action(self, action): action_header = '%*s%s\n' % tup # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header + elif len(action_header_no_color) <= action_width: + # calculate widths without color codes + action_header_color = action_header + tup = self._current_indent, '', action_width, action_header_no_color action_header = '%*s%-*s ' % tup + # swap in the colored header + action_header = action_header.replace( + action_header_no_color, action_header_color + ) indent_first = 0 # long action name; start on the next line @@ -527,23 +582,42 @@ def _format_action(self, action): return self._join_parts(parts) def _format_action_invocation(self, action): + t = self._theme + if not action.option_strings: default = self._get_default_metavar_for_positional(action) - return ' '.join(self._metavar_formatter(action, default)(1)) + return ( + t.action + + ' '.join(self._metavar_formatter(action, default)(1)) + + t.reset + ) else: + def color_option_strings(strings): + parts = [] + for s in strings: + if self._is_long_option(s): + parts.append(f"{t.long_option}{s}{t.reset}") + else: + parts.append(f"{t.short_option}{s}{t.reset}") + return parts + # if the Optional doesn't take a value, format is: # -s, --long if action.nargs == 0: - return ', '.join(action.option_strings) + option_strings = color_option_strings(action.option_strings) + return ', '.join(option_strings) # if the Optional takes a value, format is: # -s, --long ARGS else: default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - return ', '.join(action.option_strings) + ' ' + args_string + option_strings = color_option_strings(action.option_strings) + args_string = ( + f"{t.label}{self._format_args(action, default)}{t.reset}" + ) + return ', '.join(option_strings) + ' ' + args_string def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: @@ -1157,6 +1231,7 @@ def __init__(self, self._name_parser_map = {} self._choices_actions = [] self._deprecated = set() + self._color = False super(_SubParsersAction, self).__init__( option_strings=option_strings, @@ -1172,6 +1247,10 @@ def add_parser(self, name, *, deprecated=False, **kwargs): if kwargs.get('prog') is None: kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + # set color + if kwargs.get('color') is None: + kwargs['color'] = self._color + aliases = kwargs.pop('aliases', ()) if name in self._name_parser_map: @@ -1391,6 +1470,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1401,6 +1481,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1420,6 +1501,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1776,7 +1858,8 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer): - exit_on_error -- Determines whether or not ArgumentParser exits with error info when an error occurs - suggest_on_error - Enables suggestions for mistyped argument choices - and subparser names. (default: ``False``) + and subparser names (default: ``False``) + - color - Allow color output in help messages (default: ``False``) """ def __init__(self, @@ -1793,8 +1876,10 @@ def __init__(self, add_help=True, allow_abbrev=True, exit_on_error=True, - suggest_on_error=False): - + *, + suggest_on_error=False, + color=False, + ): superinit = super(ArgumentParser, self).__init__ superinit(description=description, prefix_chars=prefix_chars, @@ -1810,6 +1895,7 @@ def __init__(self, self.allow_abbrev = allow_abbrev self.exit_on_error = exit_on_error self.suggest_on_error = suggest_on_error + self.color = color add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) @@ -1841,6 +1927,7 @@ def identity(string): # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1855,6 +1942,7 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: raise ValueError('cannot have multiple subparser arguments') @@ -1881,6 +1969,7 @@ def add_subparsers(self, **kwargs): # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) + action._color = self.color self._check_help(action) self._subparsers._add_action(action) @@ -1907,6 +1996,7 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: @@ -2501,6 +2591,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # ======================== # Value conversion methods # ======================== + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: @@ -2600,6 +2691,7 @@ def _check_value(self, action, value): # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2630,11 +2722,14 @@ def format_help(self): return formatter.format_help() def _get_formatter(self): - return self.formatter_class(prog=self.prog) + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(self.color) + return formatter # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2656,6 +2751,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) diff --git a/Lib/ast.py b/Lib/ast.py index 507fec5f2d3..b9791bf52d3 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1,28 +1,24 @@ """ - ast - ~~~ +The `ast` module helps Python applications to process trees of the Python +abstract syntax grammar. The abstract syntax itself might change with +each Python release; this module helps to find out programmatically what +the current grammar looks like and allows modifications of it. - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. +An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as +a flag to the `compile()` builtin function or by using the `parse()` +function from this module. The result will be a tree of objects whose +classes all inherit from `ast.AST`. - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. +A modified abstract syntax tree can be compiled into a Python code object +using the built-in `compile()` function. - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. +Additionally various helper functions are provided that make working with +the trees simpler. The main intention of the helper functions and this +module in general is to provide an easy to use interface for libraries +that work tightly with the python syntax (template engines for example). - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - - - :copyright: Copyright 2008 by Armin Ronacher. - :license: Python License. +:copyright: Copyright 2008 by Armin Ronacher. +:license: Python License. """ from _ast import * @@ -630,11 +626,11 @@ def unparse(ast_obj): return unparser.visit(ast_obj) -def main(): +def main(args=None): import argparse import sys - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', @@ -647,7 +643,16 @@ def main(): 'column offsets') parser.add_argument('-i', '--indent', type=int, default=3, help='indentation of nodes (number of spaces)') - args = parser.parse_args() + parser.add_argument('--feature-version', + type=str, default=None, metavar='VERSION', + help='Python version in the format 3.x ' + '(for example, 3.10)') + parser.add_argument('-O', '--optimize', + type=int, default=-1, metavar='LEVEL', + help='optimization level for parser (default -1)') + parser.add_argument('--show-empty', default=False, action='store_true', + help='show empty lists and fields in dump output') + args = parser.parse_args(args) if args.infile == '-': name = '' @@ -656,8 +661,22 @@ def main(): name = args.infile with open(args.infile, 'rb') as infile: source = infile.read() - tree = parse(source, name, args.mode, type_comments=args.no_type_comments) - print(dump(tree, include_attributes=args.include_attributes, indent=args.indent)) + + # Process feature_version + feature_version = None + if args.feature_version: + try: + major, minor = map(int, args.feature_version.split('.', 1)) + except ValueError: + parser.error('Invalid format for --feature-version; ' + 'expected format 3.x (for example, 3.10)') + + feature_version = (major, minor) + + tree = parse(source, name, args.mode, type_comments=args.no_type_comments, + feature_version=feature_version, optimize=args.optimize) + print(dump(tree, include_attributes=args.include_attributes, + indent=args.indent, show_empty=args.show_empty)) if __name__ == '__main__': main() diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py index e624f7632be..21ca5c5f62a 100644 --- a/Lib/asyncio/__main__.py +++ b/Lib/asyncio/__main__.py @@ -1,5 +1,7 @@ +import argparse import ast import asyncio +import asyncio.tools import concurrent.futures import contextvars import inspect @@ -10,7 +12,7 @@ import types import warnings -from _colorize import can_colorize, ANSIColors # type: ignore[import-not-found] +from _colorize import get_theme from _pyrepl.console import InteractiveColoredConsole from . import futures @@ -101,8 +103,9 @@ def run(self): exec(startup_code, console.locals) ps1 = getattr(sys, "ps1", ">>> ") - if can_colorize() and CAN_USE_PYREPL: - ps1 = f"{ANSIColors.BOLD_MAGENTA}{ps1}{ANSIColors.RESET}" + if CAN_USE_PYREPL: + theme = get_theme().syntax + ps1 = f"{theme.prompt}{ps1}{theme.reset}" console.write(f"{ps1}import asyncio\n") if CAN_USE_PYREPL: @@ -140,6 +143,37 @@ def interrupt(self) -> None: if __name__ == '__main__': + parser = argparse.ArgumentParser( + prog="python3 -m asyncio", + description="Interactive asyncio shell and CLI tools", + color=True, + ) + subparsers = parser.add_subparsers(help="sub-commands", dest="command") + ps = subparsers.add_parser( + "ps", help="Display a table of all pending tasks in a process" + ) + ps.add_argument("pid", type=int, help="Process ID to inspect") + pstree = subparsers.add_parser( + "pstree", help="Display a tree of all pending tasks in a process" + ) + pstree.add_argument("pid", type=int, help="Process ID to inspect") + args = parser.parse_args() + match args.command: + case "ps": + asyncio.tools.display_awaited_by_tasks_table(args.pid) + sys.exit(0) + case "pstree": + asyncio.tools.display_awaited_by_tasks_tree(args.pid) + sys.exit(0) + case None: + pass # continue to the interactive shell + case _: + # shouldn't happen as an invalid command-line wouldn't parse + # but let's keep it for the next person adding a command + print(f"error: unhandled command {args.command}", file=sys.stderr) + parser.print_usage(file=sys.stderr) + sys.exit(1) + sys.audit("cpython.run_stdin") if os.getenv('PYTHON_BASIC_REPL'): @@ -149,7 +183,7 @@ def interrupt(self) -> None: return_code = 0 loop = asyncio.new_event_loop() - asyncio._set_event_loop(loop) + asyncio.set_event_loop(loop) repl_locals = {'asyncio': asyncio} for key in {'__name__', '__package__', diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index ff63639638c..04fb961e998 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -459,7 +459,7 @@ def create_future(self): return futures.Future(loop=self) def create_task(self, coro, **kwargs): - """Schedule a coroutine object. + """Schedule or begin executing a coroutine object. Return a task object. """ @@ -1666,8 +1666,7 @@ async def connect_accepted_socket( raise ValueError( 'ssl_shutdown_timeout is only meaningful with ssl') - if sock is not None: - _check_ssl_socket(sock) + _check_ssl_socket(sock) transport, protocol = await self._create_connection_transport( sock, protocol_factory, ssl, '', server_side=True, @@ -1880,6 +1879,8 @@ def call_exception_handler(self, context): - 'protocol' (optional): Protocol instance; - 'transport' (optional): Transport instance; - 'socket' (optional): Socket instance; + - 'source_traceback' (optional): Traceback of the source; + - 'handle_traceback' (optional): Traceback of the handle; - 'asyncgen' (optional): Asynchronous generator that caused the exception. diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 9c2ba679ce2..d40af422e61 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -104,7 +104,12 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() if (self._proc is not None and # has the child process finished? diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 2e45b4fe6fa..2913f901dca 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -15,7 +15,6 @@ "_set_event_loop_policy", "set_event_loop_policy", "get_event_loop", - "_set_event_loop", "set_event_loop", "new_event_loop", "_set_running_loop", @@ -835,13 +834,9 @@ def get_event_loop(): return _get_event_loop_policy().get_event_loop() -def _set_event_loop(loop): - _get_event_loop_policy().set_event_loop(loop) - def set_event_loop(loop): """Equivalent to calling get_event_loop_policy().set_event_loop(loop).""" - warnings._deprecated('asyncio.set_event_loop', remove=(3,16)) - _set_event_loop(loop) + _get_event_loop_policy().set_event_loop(loop) def new_event_loop(): diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index d1df6707302..6bd00a64478 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -351,22 +351,19 @@ def _set_concurrent_future_state(concurrent, source): def _copy_future_state(source, dest): """Internal helper to copy state from another Future. - The other Future may be a concurrent.futures.Future. + The other Future must be a concurrent.futures.Future. """ - assert source.done() if dest.cancelled(): return assert not dest.done() - if source.cancelled(): + done, cancelled, result, exception = source._get_snapshot() + assert done + if cancelled: dest.cancel() + elif exception is not None: + dest.set_exception(_convert_future_exc(exception)) else: - exception = source.exception() - if exception is not None: - dest.set_exception(_convert_future_exc(exception)) - else: - result = source.result() - dest.set_result(result) - + dest.set_result(result) def _chain_future(source, destination): """Chain two futures so that when one completes, so does the other. diff --git a/Lib/asyncio/graph.py b/Lib/asyncio/graph.py index d8df7c9919a..b5bfeb1630a 100644 --- a/Lib/asyncio/graph.py +++ b/Lib/asyncio/graph.py @@ -1,6 +1,7 @@ """Introspection utils for tasks call graphs.""" import dataclasses +import io import sys import types @@ -16,9 +17,6 @@ 'FutureCallGraph', ) -if False: # for type checkers - from typing import TextIO - # Sadly, we can't re-use the traceback module's datastructures as those # are tailored for error reporting, whereas we need to represent an # async call graph. @@ -270,7 +268,7 @@ def print_call_graph( future: futures.Future | None = None, /, *, - file: TextIO | None = None, + file: io.Writer[str] | None = None, depth: int = 1, limit: int | None = None, ) -> None: diff --git a/Lib/asyncio/runners.py b/Lib/asyncio/runners.py index 14397b4ad0c..ba37e003a65 100644 --- a/Lib/asyncio/runners.py +++ b/Lib/asyncio/runners.py @@ -74,7 +74,7 @@ def close(self): loop.shutdown_default_executor(constants.THREAD_JOIN_TIMEOUT)) finally: if self._set_event_loop: - events._set_event_loop(None) + events.set_event_loop(None) loop.close() self._loop = None self._state = _State.CLOSED @@ -147,7 +147,7 @@ def _lazy_init(self): if not self._set_event_loop: # Call set_event_loop only once to avoid calling # attach_loop multiple times on child watchers - events._set_event_loop(self._loop) + events.set_event_loop(self._loop) self._set_event_loop = True else: self._loop = self._loop_factory() diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index 22147451fa7..6ad84044adf 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -173,7 +173,7 @@ def _accept_connection( # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. # See https://bugs.python.org/issue27906 for more details. - for _ in range(backlog): + for _ in range(backlog + 1): try: conn, addr = sock.accept() if self._debug: diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index 1633478d1c8..00e8f6d5d1a 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -179,7 +179,7 @@ async def _aexit(self, et, exc): exc = None - def create_task(self, coro, *, name=None, context=None): + def create_task(self, coro, **kwargs): """Create a new task in this group and return it. Similar to `asyncio.create_task`. @@ -193,10 +193,7 @@ def create_task(self, coro, *, name=None, context=None): if self._aborting: coro.close() raise RuntimeError(f"TaskGroup {self!r} is shutting down") - if context is None: - task = self._loop.create_task(coro, name=name) - else: - task = self._loop.create_task(coro, name=name, context=context) + task = self._loop.create_task(coro, **kwargs) futures.future_add_to_awaited_by(task, self._parent_task) diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 825e91f5594..fbd5c39a7c5 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -386,19 +386,13 @@ def __wakeup(self, future): Task = _CTask = _asyncio.Task -def create_task(coro, *, name=None, context=None): +def create_task(coro, **kwargs): """Schedule the execution of a coroutine object in a spawn task. Return a Task object. """ loop = events.get_running_loop() - if context is None: - # Use legacy API if context is not needed - task = loop.create_task(coro, name=name) - else: - task = loop.create_task(coro, name=name, context=context) - - return task + return loop.create_task(coro, **kwargs) # wait() and as_completed() similar to those in PEP 3148. @@ -914,6 +908,25 @@ def _done_callback(fut, cur_task=cur_task): return outer +def _log_on_exception(fut): + if fut.cancelled(): + return + + exc = fut.exception() + if exc is None: + return + + context = { + 'message': + f'{exc.__class__.__name__} exception in shielded future', + 'exception': exc, + 'future': fut, + } + if fut._source_traceback: + context['source_traceback'] = fut._source_traceback + fut._loop.call_exception_handler(context) + + def shield(arg): """Wait for a future, shielding it from cancellation. @@ -959,14 +972,11 @@ def shield(arg): else: cur_task = None - def _inner_done_callback(inner, cur_task=cur_task): - if cur_task is not None: - futures.future_discard_from_awaited_by(inner, cur_task) + def _clear_awaited_by_callback(inner): + futures.future_discard_from_awaited_by(inner, cur_task) + def _inner_done_callback(inner): if outer.cancelled(): - if not inner.cancelled(): - # Mark inner's result as retrieved. - inner.exception() return if inner.cancelled(): @@ -978,10 +988,16 @@ def _inner_done_callback(inner, cur_task=cur_task): else: outer.set_result(inner.result()) - def _outer_done_callback(outer): if not inner.done(): inner.remove_done_callback(_inner_done_callback) + # Keep only one callback to log on cancel + inner.remove_done_callback(_log_on_exception) + inner.add_done_callback(_log_on_exception) + + if cur_task is not None: + inner.add_done_callback(_clear_awaited_by_callback) + inner.add_done_callback(_inner_done_callback) outer.add_done_callback(_outer_done_callback) @@ -1030,9 +1046,9 @@ def create_eager_task_factory(custom_task_constructor): used. E.g. `loop.set_task_factory(asyncio.eager_task_factory)`. """ - def factory(loop, coro, *, name=None, context=None): + def factory(loop, coro, *, eager_start=True, **kwargs): return custom_task_constructor( - coro, loop=loop, name=name, context=context, eager_start=True) + coro, loop=loop, eager_start=eager_start, **kwargs) return factory diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py new file mode 100644 index 00000000000..b2da7d2f6ba --- /dev/null +++ b/Lib/asyncio/tools.py @@ -0,0 +1,228 @@ +"""Tools to analyze tasks running in asyncio programs.""" + +from dataclasses import dataclass +from collections import defaultdict +from itertools import count +from enum import Enum +import sys +from _remote_debugging import get_all_awaited_by + + +class NodeType(Enum): + COROUTINE = 1 + TASK = 2 + + +class CycleFoundException(Exception): + """Raised when there is a cycle when drawing the call tree.""" + def __init__( + self, + cycles: list[list[int]], + id2name: dict[int, str], + ) -> None: + super().__init__(cycles, id2name) + self.cycles = cycles + self.id2name = id2name + + + +# ─── indexing helpers ─────────────────────────────────────────── +def _format_stack_entry(elem: tuple[str, str, int] | str) -> str: + if isinstance(elem, tuple): + fqname, path, line_no = elem + return f"{fqname} {path}:{line_no}" + + return elem + + +def _index(result): + id2name, awaits = {}, [] + for _thr_id, tasks in result: + for tid, tname, awaited in tasks: + id2name[tid] = tname + for stack, parent_id in awaited: + stack = [_format_stack_entry(elem) for elem in stack] + awaits.append((parent_id, stack, tid)) + return id2name, awaits + + +def _build_tree(id2name, awaits): + id2label = {(NodeType.TASK, tid): name for tid, name in id2name.items()} + children = defaultdict(list) + cor_names = defaultdict(dict) # (parent) -> {frame: node} + cor_id_seq = count(1) + + def _cor_node(parent_key, frame_name): + """Return an existing or new (NodeType.COROUTINE, …) node under *parent_key*.""" + bucket = cor_names[parent_key] + if frame_name in bucket: + return bucket[frame_name] + node_key = (NodeType.COROUTINE, f"c{next(cor_id_seq)}") + id2label[node_key] = frame_name + children[parent_key].append(node_key) + bucket[frame_name] = node_key + return node_key + + # lay down parent ➜ …frames… ➜ child paths + for parent_id, stack, child_id in awaits: + cur = (NodeType.TASK, parent_id) + for frame in reversed(stack): # outer-most → inner-most + cur = _cor_node(cur, frame) + child_key = (NodeType.TASK, child_id) + if child_key not in children[cur]: + children[cur].append(child_key) + + return id2label, children + + +def _roots(id2label, children): + all_children = {c for kids in children.values() for c in kids} + return [n for n in id2label if n not in all_children] + +# ─── detect cycles in the task-to-task graph ─────────────────────── +def _task_graph(awaits): + """Return {parent_task_id: {child_task_id, …}, …}.""" + g = defaultdict(set) + for parent_id, _stack, child_id in awaits: + g[parent_id].add(child_id) + return g + + +def _find_cycles(graph): + """ + Depth-first search for back-edges. + + Returns a list of cycles (each cycle is a list of task-ids) or an + empty list if the graph is acyclic. + """ + WHITE, GREY, BLACK = 0, 1, 2 + color = defaultdict(lambda: WHITE) + path, cycles = [], [] + + def dfs(v): + color[v] = GREY + path.append(v) + for w in graph.get(v, ()): + if color[w] == WHITE: + dfs(w) + elif color[w] == GREY: # back-edge → cycle! + i = path.index(w) + cycles.append(path[i:] + [w]) # make a copy + color[v] = BLACK + path.pop() + + for v in list(graph): + if color[v] == WHITE: + dfs(v) + return cycles + + +# ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def build_async_tree(result, task_emoji="(T)", cor_emoji=""): + """ + Build a list of strings for pretty-print an async call tree. + + The call tree is produced by `get_all_async_stacks()`, prefixing tasks + with `task_emoji` and coroutine frames with `cor_emoji`. + """ + id2name, awaits = _index(result) + g = _task_graph(awaits) + cycles = _find_cycles(g) + if cycles: + raise CycleFoundException(cycles, id2name) + labels, children = _build_tree(id2name, awaits) + + def pretty(node): + flag = task_emoji if node[0] == NodeType.TASK else cor_emoji + return f"{flag} {labels[node]}" + + def render(node, prefix="", last=True, buf=None): + if buf is None: + buf = [] + buf.append(f"{prefix}{'└── ' if last else '├── '}{pretty(node)}") + new_pref = prefix + (" " if last else "│ ") + kids = children.get(node, []) + for i, kid in enumerate(kids): + render(kid, new_pref, i == len(kids) - 1, buf) + return buf + + return [render(root) for root in _roots(labels, children)] + + +def build_task_table(result): + id2name, awaits = _index(result) + table = [] + for tid, tasks in result: + for task_id, task_name, awaited in tasks: + if not awaited: + table.append( + [ + tid, + hex(task_id), + task_name, + "", + "", + "0x0" + ] + ) + for stack, awaiter_id in awaited: + stack = [elem[0] if isinstance(elem, tuple) else elem for elem in stack] + coroutine_chain = " -> ".join(stack) + awaiter_name = id2name.get(awaiter_id, "Unknown") + table.append( + [ + tid, + hex(task_id), + task_name, + coroutine_chain, + awaiter_name, + hex(awaiter_id), + ] + ) + + return table + +def _print_cycle_exception(exception: CycleFoundException): + print("ERROR: await-graph contains cycles - cannot print a tree!", file=sys.stderr) + print("", file=sys.stderr) + for c in exception.cycles: + inames = " → ".join(exception.id2name.get(tid, hex(tid)) for tid in c) + print(f"cycle: {inames}", file=sys.stderr) + + +def _get_awaited_by_tasks(pid: int) -> list: + try: + return get_all_awaited_by(pid) + except RuntimeError as e: + while e.__context__ is not None: + e = e.__context__ + print(f"Error retrieving tasks: {e}") + sys.exit(1) + + +def display_awaited_by_tasks_table(pid: int) -> None: + """Build and print a table of all pending tasks under `pid`.""" + + tasks = _get_awaited_by_tasks(pid) + table = build_task_table(tasks) + # Print the table in a simple tabular format + print( + f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine chain':<50} {'awaiter name':<20} {'awaiter id':<15}" + ) + print("-" * 135) + for row in table: + print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<20} {row[5]:<15}") + + +def display_awaited_by_tasks_tree(pid: int) -> None: + """Build and print a tree of all pending tasks under `pid`.""" + + tasks = _get_awaited_by_tasks(pid) + try: + result = build_async_tree(tasks) + except CycleFoundException as e: + _print_cycle_exception(e) + sys.exit(1) + + for tree in result: + print("\n".join(tree)) diff --git a/Lib/bdb.py b/Lib/bdb.py index ba5cacc2a54..4290ef22302 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -58,12 +58,11 @@ def start_trace(self, tracefunc): E = sys.monitoring.events all_events = 0 for event, cb_name in self.EVENT_CALLBACK_MAP.items(): - callback = getattr(self, f'{cb_name}_callback') + callback = self.callback_wrapper(getattr(self, f'{cb_name}_callback'), event) sys.monitoring.register_callback(self._tool_id, event, callback) if event != E.INSTRUCTION: all_events |= event - self.check_trace_func() - self.check_trace_opcodes() + self.update_local_events() sys.monitoring.set_events(self._tool_id, self.GLOBAL_EVENTS) self._enabled = True @@ -74,7 +73,6 @@ def stop_trace(self): if curr_tool != self._name: return sys.monitoring.clear_tool_id(self._tool_id) - self.check_trace_opcodes() sys.monitoring.free_tool_id(self._tool_id) def disable_current_event(self): @@ -84,19 +82,22 @@ def restart_events(self): if sys.monitoring.get_tool(self._tool_id) == self._name: sys.monitoring.restart_events() - def callback_wrapper(func): + def callback_wrapper(self, func, event): import functools @functools.wraps(func) - def wrapper(self, *args): + def wrapper(*args): if self._tracing_thread != threading.current_thread(): return try: frame = sys._getframe().f_back - ret = func(self, frame, *args) + ret = func(frame, *args) if self._enabled and frame.f_trace: - self.check_trace_func() - if self._disable_current_event: + self.update_local_events() + if ( + self._disable_current_event + and event not in (E.PY_THROW, E.PY_UNWIND, E.RAISE) + ): return sys.monitoring.DISABLE else: return ret @@ -109,7 +110,6 @@ def wrapper(self, *args): return wrapper - @callback_wrapper def call_callback(self, frame, code, *args): local_tracefunc = self._tracefunc(frame, 'call', None) if local_tracefunc is not None: @@ -117,22 +117,18 @@ def call_callback(self, frame, code, *args): if self._enabled: sys.monitoring.set_local_events(self._tool_id, code, self.LOCAL_EVENTS) - @callback_wrapper def return_callback(self, frame, code, offset, retval): if frame.f_trace: frame.f_trace(frame, 'return', retval) - @callback_wrapper def unwind_callback(self, frame, code, *args): if frame.f_trace: frame.f_trace(frame, 'return', None) - @callback_wrapper def line_callback(self, frame, code, *args): if frame.f_trace and frame.f_trace_lines: frame.f_trace(frame, 'line', None) - @callback_wrapper def jump_callback(self, frame, code, inst_offset, dest_offset): if dest_offset > inst_offset: return sys.monitoring.DISABLE @@ -143,7 +139,6 @@ def jump_callback(self, frame, code, inst_offset, dest_offset): if frame.f_trace and frame.f_trace_lines: frame.f_trace(frame, 'line', None) - @callback_wrapper def exception_callback(self, frame, code, offset, exc): if frame.f_trace: if exc.__traceback__ and hasattr(exc.__traceback__, 'tb_frame'): @@ -154,32 +149,22 @@ def exception_callback(self, frame, code, offset, exc): tb = tb.tb_next frame.f_trace(frame, 'exception', (type(exc), exc, exc.__traceback__)) - @callback_wrapper def opcode_callback(self, frame, code, offset): if frame.f_trace and frame.f_trace_opcodes: frame.f_trace(frame, 'opcode', None) - def check_trace_opcodes(self, frame=None): - if frame is None: - frame = sys._getframe().f_back - while frame is not None: - self.set_trace_opcodes(frame, frame.f_trace_opcodes) - frame = frame.f_back - - def set_trace_opcodes(self, frame, trace_opcodes): + def update_local_events(self, frame=None): if sys.monitoring.get_tool(self._tool_id) != self._name: return - if trace_opcodes: - sys.monitoring.set_local_events(self._tool_id, frame.f_code, E.INSTRUCTION) - else: - sys.monitoring.set_local_events(self._tool_id, frame.f_code, 0) - - def check_trace_func(self, frame=None): if frame is None: frame = sys._getframe().f_back while frame is not None: if frame.f_trace is not None: - sys.monitoring.set_local_events(self._tool_id, frame.f_code, self.LOCAL_EVENTS) + if frame.f_trace_opcodes: + events = self.LOCAL_EVENTS | E.INSTRUCTION + else: + events = self.LOCAL_EVENTS + sys.monitoring.set_local_events(self._tool_id, frame.f_code, events) frame = frame.f_back def _get_lineno(self, code, offset): @@ -544,11 +529,11 @@ def _set_trace_opcodes(self, trace_opcodes): frame = self.enterframe while frame is not None: frame.f_trace_opcodes = trace_opcodes - if self.monitoring_tracer: - self.monitoring_tracer.set_trace_opcodes(frame, trace_opcodes) if frame is self.botframe: break frame = frame.f_back + if self.monitoring_tracer: + self.monitoring_tracer.update_local_events() def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False): """Set the attributes for stopping. @@ -642,8 +627,8 @@ def set_continue(self): frame = frame.f_back for frame, (trace_lines, trace_opcodes) in self.frame_trace_lines_opcodes.items(): frame.f_trace_lines, frame.f_trace_opcodes = trace_lines, trace_opcodes - if self.backend == 'monitoring': - self.monitoring_tracer.set_trace_opcodes(frame, trace_opcodes) + if self.backend == 'monitoring': + self.monitoring_tracer.update_local_events() self.frame_trace_lines_opcodes = {} def set_quit(self): diff --git a/Lib/bz2.py b/Lib/bz2.py index 2420cd01906..eb58f4da596 100644 --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -10,9 +10,9 @@ __author__ = "Nadeem Vawda " from builtins import open as _builtin_open +from compression._common import _streams import io import os -import _compression from _bz2 import BZ2Compressor, BZ2Decompressor @@ -23,7 +23,7 @@ _MODE_WRITE = 3 -class BZ2File(_compression.BaseStream): +class BZ2File(_streams.BaseStream): """A file object providing transparent bzip2 (de)compression. @@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): raise TypeError("filename must be a str, bytes, file or PathLike object") if self._mode == _MODE_READ: - raw = _compression.DecompressReader(self._fp, + raw = _streams.DecompressReader(self._fp, BZ2Decompressor, trailing_error=OSError) self._buffer = io.BufferedReader(raw) else: @@ -248,7 +248,7 @@ def writelines(self, seq): Line separators are not added between the written byte strings. """ - return _compression.BaseStream.writelines(self, seq) + return _streams.BaseStream.writelines(self, seq) def seek(self, offset, whence=io.SEEK_SET): """Change the file position. diff --git a/Lib/cProfile.py b/Lib/cProfile.py index e7c868b8d55..770d26f7962 100644 --- a/Lib/cProfile.py +++ b/Lib/cProfile.py @@ -6,6 +6,7 @@ import _lsprof import importlib.machinery +import importlib.util import io import profile as _pyprofile @@ -173,13 +174,22 @@ def main(): code = compile(fp.read(), progname, 'exec') spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, origin=progname) - globs = { + module = importlib.util.module_from_spec(spec) + # Set __main__ so that importing __main__ in the profiled code will + # return the same namespace that the code is executing under. + sys.modules['__main__'] = module + # Ensure that we're using the same __dict__ instance as the module + # for the global variables so that updates to globals are reflected + # in the module's namespace. + globs = module.__dict__ + globs.update({ '__spec__': spec, '__file__': spec.origin, '__name__': spec.name, '__package__': None, '__cached__': None, - } + }) + try: runctx(code, globs, None, options.outfile, options.sort) except BrokenPipeError as exc: diff --git a/Lib/calendar.py b/Lib/calendar.py index 01a76ff8e78..18f76d52ff8 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -810,7 +810,7 @@ def timegm(tuple): def main(args=None): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) textgroup = parser.add_argument_group('text only arguments') htmlgroup = parser.add_argument_group('html only arguments') textgroup.add_argument( diff --git a/Lib/cmd.py b/Lib/cmd.py index 438b88aa104..51495fb3216 100644 --- a/Lib/cmd.py +++ b/Lib/cmd.py @@ -273,7 +273,7 @@ def complete(self, text, state): endidx = readline.get_endidx() - stripped if begidx>0: cmd, args, foo = self.parseline(line) - if cmd == '': + if not cmd: compfunc = self.completedefault else: try: diff --git a/Lib/code.py b/Lib/code.py index 41331dfd071..b134886dc26 100644 --- a/Lib/code.py +++ b/Lib/code.py @@ -385,7 +385,7 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None, local_exit=Fa if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) parser.add_argument('-q', action='store_true', help="don't print version and copyright messages") args = parser.parse_args() diff --git a/Lib/codecs.py b/Lib/codecs.py index e365e6cf229..fc38e922257 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -884,7 +884,6 @@ def __reduce_ex__(self, proto): ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): - """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. @@ -912,8 +911,11 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): .encoding which allows querying the used encoding. This attribute is only available if an encoding was specified as parameter. - """ + import warnings + warnings.warn("codecs.open() is deprecated. Use open() instead.", + DeprecationWarning, stacklevel=2) + if encoding is not None and \ 'b' not in mode: # Force opening of the file in binary mode diff --git a/Lib/codeop.py b/Lib/codeop.py index adf000ba29f..8cac00442d9 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -47,7 +47,7 @@ PyCF_ONLY_AST = 0x400 PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000 -def _maybe_compile(compiler, source, filename, symbol): +def _maybe_compile(compiler, source, filename, symbol, flags): # Check for source consisting of only blank lines and comments. for line in source.split("\n"): line = line.strip() @@ -61,10 +61,10 @@ def _maybe_compile(compiler, source, filename, symbol): with warnings.catch_warnings(): warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning)) try: - compiler(source, filename, symbol) + compiler(source, filename, symbol, flags=flags) except SyntaxError: # Let other compile() errors propagate. try: - compiler(source + "\n", filename, symbol) + compiler(source + "\n", filename, symbol, flags=flags) return None except _IncompleteInputError as e: return None @@ -74,14 +74,13 @@ def _maybe_compile(compiler, source, filename, symbol): return compiler(source, filename, symbol, incomplete_input=False) -def _compile(source, filename, symbol, incomplete_input=True): - flags = 0 +def _compile(source, filename, symbol, incomplete_input=True, *, flags=0): if incomplete_input: flags |= PyCF_ALLOW_INCOMPLETE_INPUT flags |= PyCF_DONT_IMPLY_DEDENT return compile(source, filename, symbol, flags) -def compile_command(source, filename="", symbol="single"): +def compile_command(source, filename="", symbol="single", flags=0): r"""Compile a command and determine whether it is incomplete. Arguments: @@ -100,7 +99,7 @@ def compile_command(source, filename="", symbol="single"): syntax error (OverflowError and ValueError can be produced by malformed literals). """ - return _maybe_compile(_compile, source, filename, symbol) + return _maybe_compile(_compile, source, filename, symbol, flags) class Compile: """Instances of this class behave much like the built-in compile @@ -152,4 +151,4 @@ def __call__(self, source, filename="", symbol="single"): syntax error (OverflowError and ValueError can be produced by malformed literals). """ - return _maybe_compile(self.compiler, source, filename, symbol) + return _maybe_compile(self.compiler, source, filename, symbol, flags=self.compiler.flags) diff --git a/Lib/compileall.py b/Lib/compileall.py index 47e2446356e..67fe370451e 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -317,7 +317,9 @@ def main(): import argparse parser = argparse.ArgumentParser( - description='Utilities to support installing Python libraries.') + description='Utilities to support installing Python libraries.', + color=True, + ) parser.add_argument('-l', action='store_const', const=0, default=None, dest='maxlevels', help="don't recurse into subdirectories") diff --git a/Lib/compression/__init__.py b/Lib/compression/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Lib/compression/_common/__init__.py b/Lib/compression/_common/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Lib/_compression.py b/Lib/compression/_common/_streams.py similarity index 98% rename from Lib/_compression.py rename to Lib/compression/_common/_streams.py index e8b70aa0a3e..9f367d4e304 100644 --- a/Lib/_compression.py +++ b/Lib/compression/_common/_streams.py @@ -1,4 +1,4 @@ -"""Internal classes used by the gzip, lzma and bz2 modules""" +"""Internal classes used by compression modules""" import io import sys diff --git a/Lib/compression/bz2.py b/Lib/compression/bz2.py new file mode 100644 index 00000000000..16815d6cd20 --- /dev/null +++ b/Lib/compression/bz2.py @@ -0,0 +1,5 @@ +import bz2 +__doc__ = bz2.__doc__ +del bz2 + +from bz2 import * diff --git a/Lib/compression/gzip.py b/Lib/compression/gzip.py new file mode 100644 index 00000000000..552f48f948a --- /dev/null +++ b/Lib/compression/gzip.py @@ -0,0 +1,5 @@ +import gzip +__doc__ = gzip.__doc__ +del gzip + +from gzip import * diff --git a/Lib/compression/lzma.py b/Lib/compression/lzma.py new file mode 100644 index 00000000000..b4bc7ccb1db --- /dev/null +++ b/Lib/compression/lzma.py @@ -0,0 +1,5 @@ +import lzma +__doc__ = lzma.__doc__ +del lzma + +from lzma import * diff --git a/Lib/compression/zlib.py b/Lib/compression/zlib.py new file mode 100644 index 00000000000..3aa7e2db90e --- /dev/null +++ b/Lib/compression/zlib.py @@ -0,0 +1,5 @@ +import zlib +__doc__ = zlib.__doc__ +del zlib + +from zlib import * diff --git a/Lib/compression/zstd/__init__.py b/Lib/compression/zstd/__init__.py new file mode 100644 index 00000000000..84b25914b0a --- /dev/null +++ b/Lib/compression/zstd/__init__.py @@ -0,0 +1,242 @@ +"""Python bindings to the Zstandard (zstd) compression library (RFC-8878).""" + +__all__ = ( + # compression.zstd + 'COMPRESSION_LEVEL_DEFAULT', + 'compress', + 'CompressionParameter', + 'decompress', + 'DecompressionParameter', + 'finalize_dict', + 'get_frame_info', + 'Strategy', + 'train_dict', + + # compression.zstd._zstdfile + 'open', + 'ZstdFile', + + # _zstd + 'get_frame_size', + 'zstd_version', + 'zstd_version_info', + 'ZstdCompressor', + 'ZstdDecompressor', + 'ZstdDict', + 'ZstdError', +) + +import _zstd +import enum +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, + get_frame_size, zstd_version) +from compression.zstd._zstdfile import ZstdFile, open, _nbytes + +# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) +zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), + _zstd.zstd_version_number % 100) +"""Version number of the runtime zstd library as a tuple of integers.""" + +COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT +"""The default compression level for Zstandard, currently '3'.""" + + +class FrameInfo: + """Information about a Zstandard frame.""" + + __slots__ = 'decompressed_size', 'dictionary_id' + + def __init__(self, decompressed_size, dictionary_id): + super().__setattr__('decompressed_size', decompressed_size) + super().__setattr__('dictionary_id', dictionary_id) + + def __repr__(self): + return (f'FrameInfo(decompressed_size={self.decompressed_size}, ' + f'dictionary_id={self.dictionary_id})') + + def __setattr__(self, name, _): + raise AttributeError(f"can't set attribute {name!r}") + + +def get_frame_info(frame_buffer): + """Get Zstandard frame information from a frame header. + + *frame_buffer* is a bytes-like object. It should start from the beginning + of a frame, and needs to include at least the frame header (6 to 18 bytes). + + The returned FrameInfo object has two attributes. + 'decompressed_size' is the size in bytes of the data in the frame when + decompressed, or None when the decompressed size is unknown. + 'dictionary_id' is an int in the range (0, 2**32). The special value 0 + means that the dictionary ID was not recorded in the frame header, + the frame may or may not need a dictionary to be decoded, + and the ID of such a dictionary is not specified. + """ + return FrameInfo(*_zstd.get_frame_info(frame_buffer)) + + +def train_dict(samples, dict_size): + """Return a ZstdDict representing a trained Zstandard dictionary. + + *samples* is an iterable of samples, where a sample is a bytes-like + object representing a file. + + *dict_size* is the dictionary's maximum size, in bytes. + """ + if not isinstance(dict_size, int): + ds_cls = type(dict_size).__qualname__ + raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("samples contained no data; can't train dictionary.") + dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) + return ZstdDict(dict_content) + + +def finalize_dict(zstd_dict, /, samples, dict_size, level): + """Return a ZstdDict representing a finalized Zstandard dictionary. + + Given a custom content as a basis for dictionary, and a set of samples, + finalize *zstd_dict* by adding headers and statistics according to the + Zstandard dictionary format. + + You may compose an effective dictionary content by hand, which is used as + basis dictionary, and use some samples to finalize a dictionary. The basis + dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict. + + *samples* is an iterable of samples, where a sample is a bytes-like object + representing a file. + *dict_size* is the dictionary's maximum size, in bytes. + *level* is the expected compression level. The statistics for each + compression level differ, so tuning the dictionary to the compression level + can provide improvements. + """ + + if not isinstance(zstd_dict, ZstdDict): + raise TypeError('zstd_dict argument should be a ZstdDict object.') + if not isinstance(dict_size, int): + raise TypeError('dict_size argument should be an int object.') + if not isinstance(level, int): + raise TypeError('level argument should be an int object.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("The samples are empty content, can't finalize the " + "dictionary.") + dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, + chunk_sizes, dict_size, level) + return ZstdDict(dict_content) + + +def compress(data, level=None, options=None, zstd_dict=None): + """Return Zstandard compressed *data* as bytes. + + *level* is an int specifying the compression level to use, defaulting to + COMPRESSION_LEVEL_DEFAULT ('3'). + *options* is a dict object that contains advanced compression + parameters. See CompressionParameter for more on options. + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See + the function train_dict for how to train a ZstdDict on sample data. + + For incremental compression, use a ZstdCompressor instead. + """ + comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) + return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) + + +def decompress(data, zstd_dict=None, options=None): + """Decompress one or more frames of Zstandard compressed *data*. + + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See + the function train_dict for how to train a ZstdDict on sample data. + *options* is a dict object that contains advanced compression + parameters. See DecompressionParameter for more on options. + + For incremental decompression, use a ZstdDecompressor instead. + """ + results = [] + while True: + decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) + results.append(decomp.decompress(data)) + if not decomp.eof: + raise ZstdError('Compressed data ended before the ' + 'end-of-stream marker was reached') + data = decomp.unused_data + if not data: + break + return b''.join(results) + + +class CompressionParameter(enum.IntEnum): + """Compression parameters.""" + + compression_level = _zstd.ZSTD_c_compressionLevel + window_log = _zstd.ZSTD_c_windowLog + hash_log = _zstd.ZSTD_c_hashLog + chain_log = _zstd.ZSTD_c_chainLog + search_log = _zstd.ZSTD_c_searchLog + min_match = _zstd.ZSTD_c_minMatch + target_length = _zstd.ZSTD_c_targetLength + strategy = _zstd.ZSTD_c_strategy + + enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching + ldm_hash_log = _zstd.ZSTD_c_ldmHashLog + ldm_min_match = _zstd.ZSTD_c_ldmMinMatch + ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog + ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog + + content_size_flag = _zstd.ZSTD_c_contentSizeFlag + checksum_flag = _zstd.ZSTD_c_checksumFlag + dict_id_flag = _zstd.ZSTD_c_dictIDFlag + + nb_workers = _zstd.ZSTD_c_nbWorkers + job_size = _zstd.ZSTD_c_jobSize + overlap_log = _zstd.ZSTD_c_overlapLog + + def bounds(self): + """Return the (lower, upper) int bounds of a compression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=True) + + +class DecompressionParameter(enum.IntEnum): + """Decompression parameters.""" + + window_log_max = _zstd.ZSTD_d_windowLogMax + + def bounds(self): + """Return the (lower, upper) int bounds of a decompression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=False) + + +class Strategy(enum.IntEnum): + """Compression strategies, listed from fastest to strongest. + + Note that new strategies might be added in the future. + Only the order (from fast to strong) is guaranteed, + the numeric value might change. + """ + + fast = _zstd.ZSTD_fast + dfast = _zstd.ZSTD_dfast + greedy = _zstd.ZSTD_greedy + lazy = _zstd.ZSTD_lazy + lazy2 = _zstd.ZSTD_lazy2 + btlazy2 = _zstd.ZSTD_btlazy2 + btopt = _zstd.ZSTD_btopt + btultra = _zstd.ZSTD_btultra + btultra2 = _zstd.ZSTD_btultra2 + + +# Check validity of the CompressionParameter & DecompressionParameter types +_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py new file mode 100644 index 00000000000..8770e576f50 --- /dev/null +++ b/Lib/compression/zstd/_zstdfile.py @@ -0,0 +1,346 @@ +import io +from os import PathLike +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdError, + ZSTD_DStreamOutSize) +from compression._common import _streams + +__all__ = ('ZstdFile', 'open') + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_WRITE = 2 + + +def _nbytes(dat, /): + if isinstance(dat, (bytes, bytearray)): + return len(dat) + with memoryview(dat) as mv: + return mv.nbytes + + +class ZstdFile(_streams.BaseStream): + """A file-like object providing transparent Zstandard (de)compression. + + A ZstdFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + ZstdFile provides a *binary* file interface. Data is read and returned as + bytes, and may only be written to objects that support the Buffer Protocol. + """ + + FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK + FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME + + def __init__(self, file, /, mode='r', *, + level=None, options=None, zstd_dict=None): + """Open a Zstandard compressed file in binary mode. + + *file* can be either an file-like object, or a file name to open. + + *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' for + creating exclusively, or 'a' for appending. These can equivalently be + given as 'rb', 'wb', 'xb' and 'ab' respectively. + + *level* is an optional int specifying the compression level to use, + or COMPRESSION_LEVEL_DEFAULT if not given. + + *options* is an optional dict for advanced compression parameters. + See CompressionParameter and DecompressionParameter for the possible + options. + + *zstd_dict* is an optional ZstdDict object, a pre-trained Zstandard + dictionary. See train_dict() to train ZstdDict on sample data. + """ + self._fp = None + self._close_fp = False + self._mode = _MODE_CLOSED + self._buffer = None + + if not isinstance(mode, str): + raise ValueError('mode must be a str') + if options is not None and not isinstance(options, dict): + raise TypeError('options must be a dict or None') + mode = mode.removesuffix('b') # handle rb, wb, xb, ab + if mode == 'r': + if level is not None: + raise TypeError('level is illegal in read mode') + self._mode = _MODE_READ + elif mode in {'w', 'a', 'x'}: + if level is not None and not isinstance(level, int): + raise TypeError('level must be int or None') + self._mode = _MODE_WRITE + self._compressor = ZstdCompressor(level=level, options=options, + zstd_dict=zstd_dict) + self._pos = 0 + else: + raise ValueError(f'Invalid mode: {mode!r}') + + if isinstance(file, (str, bytes, PathLike)): + self._fp = io.open(file, f'{mode}b') + self._close_fp = True + elif ((mode == 'r' and hasattr(file, 'read')) + or (mode != 'r' and hasattr(file, 'write'))): + self._fp = file + else: + raise TypeError('file must be a file-like object ' + 'or a str, bytes, or PathLike object') + + if self._mode == _MODE_READ: + raw = _streams.DecompressReader( + self._fp, + ZstdDecompressor, + zstd_dict=zstd_dict, + options=options, + ) + self._buffer = io.BufferedReader(raw) + + def close(self): + """Flush and close the file. + + May be called multiple times. Once the file has been closed, + any other operation on it will raise ValueError. + """ + if self._fp is None: + return + try: + if self._mode == _MODE_READ: + if getattr(self, '_buffer', None): + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self.flush(self.FLUSH_FRAME) + self._compressor = None + finally: + self._mode = _MODE_CLOSED + try: + if self._close_fp: + self._fp.close() + finally: + self._fp = None + self._close_fp = False + + def write(self, data, /): + """Write a bytes-like object *data* to the file. + + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until .flush() + or .close() is called. + """ + self._check_can_write() + + length = _nbytes(data) + + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += length + return length + + def flush(self, mode=FLUSH_BLOCK): + """Flush remaining data to the underlying stream. + + The mode argument can be FLUSH_BLOCK or FLUSH_FRAME. Abuse of this + method will reduce compression ratio, use it only when necessary. + + If the program is interrupted afterwards, all data can be recovered. + To ensure saving to disk, also need to use os.fsync(fd). + + This method does nothing in reading mode. + """ + if self._mode == _MODE_READ: + return + self._check_not_closed() + if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}: + raise ValueError('Invalid mode argument, expected either ' + 'ZstdFile.FLUSH_FRAME or ' + 'ZstdFile.FLUSH_BLOCK') + if self._compressor.last_mode == mode: + return + # Flush zstd block/frame, and write. + data = self._compressor.flush(mode) + self._fp.write(data) + if hasattr(self._fp, 'flush'): + self._fp.flush() + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + if size is None: + size = -1 + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b'' if the file is at EOF. + """ + self._check_can_read() + if size < 0: + # Note this should *not* be io.DEFAULT_BUFFER_SIZE. + # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing + # a full block is read. + size = ZSTD_DStreamOutSize + return self._buffer.read1(size) + + def readinto(self, b): + """Read bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto(b) + + def readinto1(self, b): + """Read bytes into b, while trying to avoid making multiple reads + from the underlying stream. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto1(b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + self._check_can_read() + return self._buffer.readline(size) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the arguments, + this operation may be extremely slow. + """ + self._check_can_read() + + # BufferedReader.seek() checks seekable + return self._buffer.seek(offset, whence) + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + self._check_can_read() + return self._buffer.peek(size) + + def __next__(self): + if ret := self._buffer.readline(): + return ret + raise StopIteration + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + elif self._mode == _MODE_WRITE: + return self._pos + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + +def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None, + encoding=None, errors=None, newline=None): + """Open a Zstandard compressed file in binary or text mode. + + file can be either a file name (given as a str, bytes, or PathLike object), + in which case the named file is opened, or it can be an existing file object + to read from or write to. + + The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', 'a', + 'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode. + + The level, options, and zstd_dict parameters specify the settings the same + as ZstdFile. + + When using read mode (decompression), the options parameter is a dict + representing advanced decompression options. The level parameter is not + supported in this case. When using write mode (compression), only one of + level, an int representing the compression level, or options, a dict + representing advanced compression options, may be passed. In both modes, + zstd_dict is a ZstdDict instance containing a trained Zstandard dictionary. + + For binary mode, this function is equivalent to the ZstdFile constructor: + ZstdFile(filename, mode, ...). In this case, the encoding, errors and + newline parameters must not be provided. + + For text mode, an ZstdFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + + text_mode = 't' in mode + mode = mode.replace('t', '') + + if text_mode: + if 'b' in mode: + raise ValueError(f'Invalid mode: {mode!r}') + else: + if encoding is not None: + raise ValueError('Argument "encoding" not supported in binary mode') + if errors is not None: + raise ValueError('Argument "errors" not supported in binary mode') + if newline is not None: + raise ValueError('Argument "newline" not supported in binary mode') + + binary_file = ZstdFile(file, mode, level=level, options=options, + zstd_dict=zstd_dict) + + if text_mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/Lib/concurrent/futures/_base.py b/Lib/concurrent/futures/_base.py index d98b1ebdd58..f506ce68aea 100644 --- a/Lib/concurrent/futures/_base.py +++ b/Lib/concurrent/futures/_base.py @@ -558,6 +558,33 @@ def set_exception(self, exception): self._condition.notify_all() self._invoke_callbacks() + def _get_snapshot(self): + """Get a snapshot of the future's current state. + + This method atomically retrieves the state in one lock acquisition, + which is significantly faster than multiple method calls. + + Returns: + Tuple of (done, cancelled, result, exception) + - done: True if the future is done (cancelled or finished) + - cancelled: True if the future was cancelled + - result: The result if available and not cancelled + - exception: The exception if available and not cancelled + """ + # Fast path: check if already finished without lock + if self._state == FINISHED: + return True, False, self._result, self._exception + + # Need lock for other states since they can change + with self._condition: + # We have to check the state again after acquiring the lock + # because it may have changed in the meantime. + if self._state == FINISHED: + return True, False, self._result, self._exception + if self._state in {CANCELLED, CANCELLED_AND_NOTIFIED}: + return True, True, None, None + return False, False, None, None + __class_getitem__ = classmethod(types.GenericAlias) class Executor(object): diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index d17688dc9d7..a2c4fbfd3fb 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -36,9 +36,6 @@ def __str__(self): """.strip()) -UNBOUND = 2 # error; this should not happen. - - class WorkerContext(_thread.WorkerContext): @classmethod @@ -47,23 +44,13 @@ def resolve_task(fn, args, kwargs): if isinstance(fn, str): # XXX Circle back to this later. raise TypeError('scripts not supported') - if args or kwargs: - raise ValueError(f'a script does not take args or kwargs, got {args!r} and {kwargs!r}') - data = textwrap.dedent(fn) - kind = 'script' - # Make sure the script compiles. - # Ideally we wouldn't throw away the resulting code - # object. However, there isn't much to be done until - # code objects are shareable and/or we do a better job - # of supporting code objects in _interpreters.exec(). - compile(data, '', 'exec') else: # Functions defined in the __main__ module can't be pickled, # so they can't be used here. In the future, we could possibly # borrow from multiprocessing to work around this. - data = pickle.dumps((fn, args, kwargs)) - kind = 'function' - return (data, kind) + task = (fn, args, kwargs) + data = pickle.dumps(task) + return data if initializer is not None: try: @@ -86,24 +73,20 @@ def _capture_exc(cls, resultsid): except BaseException as exc: # Send the captured exception out on the results queue, # but still leave it unhandled for the interpreter to handle. - err = pickle.dumps(exc) - _interpqueues.put(resultsid, (None, err), 1, UNBOUND) + _interpqueues.put(resultsid, (None, exc)) raise # re-raise @classmethod def _send_script_result(cls, resultsid): - _interpqueues.put(resultsid, (None, None), 0, UNBOUND) + _interpqueues.put(resultsid, (None, None)) @classmethod def _call(cls, func, args, kwargs, resultsid): with cls._capture_exc(resultsid): res = func(*args or (), **kwargs or {}) # Send the result back. - try: - _interpqueues.put(resultsid, (res, None), 0, UNBOUND) - except _interpreters.NotShareableError: - res = pickle.dumps(res) - _interpqueues.put(resultsid, (res, None), 1, UNBOUND) + with cls._capture_exc(resultsid): + _interpqueues.put(resultsid, (res, None)) @classmethod def _call_pickled(cls, pickled, resultsid): @@ -134,8 +117,7 @@ def initialize(self): _interpreters.incref(self.interpid) maxsize = 0 - fmt = 0 - self.resultsid = _interpqueues.create(maxsize, fmt, UNBOUND) + self.resultsid = _interpqueues.create(maxsize) self._exec(f'from {__name__} import WorkerContext') @@ -166,17 +148,8 @@ def finalize(self): pass def run(self, task): - data, kind = task - if kind == 'script': - raise NotImplementedError('script kind disabled') - script = f""" -with WorkerContext._capture_exc({self.resultsid}): -{textwrap.indent(data, ' ')} -WorkerContext._send_script_result({self.resultsid})""" - elif kind == 'function': - script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' - else: - raise NotImplementedError(kind) + data = task + script = f'WorkerContext._call_pickled({data!r}, {self.resultsid})' try: self._exec(script) @@ -199,15 +172,13 @@ def run(self, task): continue else: break - (res, excdata), pickled, unboundop = obj + (res, exc), unboundop = obj assert unboundop is None, unboundop - if excdata is not None: + if exc is not None: assert res is None, res - assert pickled assert exc_wrapper is not None - exc = pickle.loads(excdata) raise exc from exc_wrapper - return pickle.loads(res) if pickled else res + return res class BrokenInterpreterPool(_thread.BrokenThreadPool): diff --git a/Lib/configparser.py b/Lib/configparser.py index 70cc651edab..239fda60a02 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -541,6 +541,8 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, except (KeyError, NoSectionError, NoOptionError): raise InterpolationMissingOptionError( option, section, rawval, ":".join(path)) from None + if v is None: + continue if "$" in v: self._interpolate_some(parser, opt, accum, v, sect, dict(parser.items(sect, raw=True)), diff --git a/Lib/copyreg.py b/Lib/copyreg.py index 17c5dde67c8..a5e8add4a55 100644 --- a/Lib/copyreg.py +++ b/Lib/copyreg.py @@ -31,8 +31,8 @@ def pickle_complex(c): pickle(complex, pickle_complex, complex) def pickle_union(obj): - import functools, operator - return functools.reduce, (operator.or_, obj.__args__) + import typing, operator + return operator.getitem, (typing.Union, obj.__args__) pickle(type(int | str), pickle_union) diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index bba08b99b95..823a3692fd1 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -209,13 +209,13 @@ class c_longdouble(_SimpleCData): try: class c_double_complex(_SimpleCData): - _type_ = "C" + _type_ = "D" _check_size(c_double_complex) class c_float_complex(_SimpleCData): - _type_ = "E" + _type_ = "F" _check_size(c_float_complex) class c_longdouble_complex(_SimpleCData): - _type_ = "F" + _type_ = "G" except AttributeError: pass @@ -266,7 +266,72 @@ class c_void_p(_SimpleCData): class c_bool(_SimpleCData): _type_ = "?" -from _ctypes import POINTER, pointer, _pointer_type_cache +def POINTER(cls): + """Create and return a new ctypes pointer type. + + Pointer types are cached and reused internally, + so calling this function repeatedly is cheap. + """ + if cls is None: + return c_void_p + try: + return cls.__pointer_type__ + except AttributeError: + pass + if isinstance(cls, str): + # handle old-style incomplete types (see test_ctypes.test_incomplete) + import warnings + warnings._deprecated("ctypes.POINTER with string", remove=(3, 19)) + try: + return _pointer_type_cache_fallback[cls] + except KeyError: + result = type(f'LP_{cls}', (_Pointer,), {}) + _pointer_type_cache_fallback[cls] = result + return result + + # create pointer type and set __pointer_type__ for cls + return type(f'LP_{cls.__name__}', (_Pointer,), {'_type_': cls}) + +def pointer(obj): + """Create a new pointer instance, pointing to 'obj'. + + The returned object is of the type POINTER(type(obj)). Note that if you + just want to pass a pointer to an object to a foreign function call, you + should use byref(obj) which is much faster. + """ + typ = POINTER(type(obj)) + return typ(obj) + +class _PointerTypeCache: + def __setitem__(self, cls, pointer_type): + import warnings + warnings._deprecated("ctypes._pointer_type_cache", remove=(3, 19)) + try: + cls.__pointer_type__ = pointer_type + except AttributeError: + _pointer_type_cache_fallback[cls] = pointer_type + + def __getitem__(self, cls): + import warnings + warnings._deprecated("ctypes._pointer_type_cache", remove=(3, 19)) + try: + return cls.__pointer_type__ + except AttributeError: + return _pointer_type_cache_fallback[cls] + + def get(self, cls, default=None): + import warnings + warnings._deprecated("ctypes._pointer_type_cache", remove=(3, 19)) + try: + return cls.__pointer_type__ + except AttributeError: + return _pointer_type_cache_fallback.get(cls, default) + + def __contains__(self, cls): + return hasattr(cls, '__pointer_type__') + +_pointer_type_cache_fallback = {} +_pointer_type_cache = _PointerTypeCache() class c_wchar_p(_SimpleCData): _type_ = "Z" @@ -277,7 +342,7 @@ class c_wchar(_SimpleCData): _type_ = "u" def _reset_cache(): - _pointer_type_cache.clear() + _pointer_type_cache_fallback.clear() _c_functype_cache.clear() if _os.name == "nt": _win_functype_cache.clear() @@ -285,7 +350,6 @@ def _reset_cache(): POINTER(c_wchar).from_param = c_wchar_p.from_param # _SimpleCData.c_char_p_from_param POINTER(c_char).from_param = c_char_p.from_param - _pointer_type_cache[None] = c_void_p def create_unicode_buffer(init, size=None): """create_unicode_buffer(aString) -> character array @@ -319,13 +383,7 @@ def create_unicode_buffer(init, size=None): def SetPointerType(pointer, cls): import warnings warnings._deprecated("ctypes.SetPointerType", remove=(3, 15)) - if _pointer_type_cache.get(cls, None) is not None: - raise RuntimeError("This type already exists in the cache") - if id(pointer) not in _pointer_type_cache: - raise RuntimeError("What's this???") pointer.set_type(cls) - _pointer_type_cache[cls] = pointer - del _pointer_type_cache[id(pointer)] def ARRAY(typ, len): return typ * len diff --git a/Lib/ctypes/_layout.py b/Lib/ctypes/_layout.py index beb3b86414c..2048ccb6a1c 100644 --- a/Lib/ctypes/_layout.py +++ b/Lib/ctypes/_layout.py @@ -5,6 +5,7 @@ """ import sys +import warnings from _ctypes import CField, buffer_info import ctypes @@ -66,9 +67,26 @@ def get_layout(cls, input_fields, is_struct, base): # For clarity, variables that count bits have `bit` in their names. + pack = getattr(cls, '_pack_', None) + layout = getattr(cls, '_layout_', None) if layout is None: - if sys.platform == 'win32' or getattr(cls, '_pack_', None): + if sys.platform == 'win32': + gcc_layout = False + elif pack: + if is_struct: + base_type_name = 'Structure' + else: + base_type_name = 'Union' + warnings._deprecated( + '_pack_ without _layout_', + f"Due to '_pack_', the '{cls.__name__}' {base_type_name} will " + + "use memory layout compatible with MSVC (Windows). " + + "If this is intended, set _layout_ to 'ms'. " + + "The implicit default is deprecated and slated to become " + + "an error in Python {remove}.", + remove=(3, 19), + ) gcc_layout = False else: gcc_layout = True @@ -84,7 +102,7 @@ def get_layout(cls, input_fields, is_struct, base): raise ValueError('_align_ must be a non-negative integer') elif align == 0: # Setting `_align_ = 0` amounts to using the default alignment - align == 1 + align = 1 if base: align = max(ctypes.alignment(base), align) @@ -95,7 +113,6 @@ def get_layout(cls, input_fields, is_struct, base): else: big_endian = sys.byteorder == 'big' - pack = getattr(cls, '_pack_', None) if pack is not None: try: pack = int(pack) diff --git a/Lib/curses/__init__.py b/Lib/curses/__init__.py index 6165fe6c987..605d5fcbec5 100644 --- a/Lib/curses/__init__.py +++ b/Lib/curses/__init__.py @@ -30,9 +30,8 @@ def initscr(): fd=_sys.__stdout__.fileno()) stdscr = _curses.initscr() for key, value in _curses.__dict__.items(): - if key[0:4] == 'ACS_' or key in ('LINES', 'COLS'): + if key.startswith('ACS_') or key in ('LINES', 'COLS'): setattr(curses, key, value) - return stdscr # This is a similar wrapper for start_color(), which adds the COLORS and @@ -41,12 +40,9 @@ def initscr(): def start_color(): import _curses, curses - retval = _curses.start_color() - if hasattr(_curses, 'COLORS'): - curses.COLORS = _curses.COLORS - if hasattr(_curses, 'COLOR_PAIRS'): - curses.COLOR_PAIRS = _curses.COLOR_PAIRS - return retval + _curses.start_color() + curses.COLORS = _curses.COLORS + curses.COLOR_PAIRS = _curses.COLOR_PAIRS # Import Python has_key() implementation if _curses doesn't contain has_key() @@ -85,10 +81,11 @@ def wrapper(func, /, *args, **kwds): # Start color, too. Harmless if the terminal doesn't have # color; user can test with has_color() later on. The try/catch # works around a minor bit of over-conscientiousness in the curses - # module -- the error return from C start_color() is ignorable. + # module -- the error return from C start_color() is ignorable, + # unless they are raised by the interpreter due to other issues. try: start_color() - except: + except _curses.error: pass return func(stdscr, *args, **kwds) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 0f7dc9ae6b8..86d29df0639 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -244,6 +244,10 @@ def __repr__(self): property, }) +# Any marker is used in `make_dataclass` to mark unannotated fields as `Any` +# without importing `typing` module. +_ANY_MARKER = object() + class InitVar: __slots__ = ('type', ) @@ -1591,7 +1595,7 @@ class C(Base): for item in fields: if isinstance(item, str): name = item - tp = 'typing.Any' + tp = _ANY_MARKER elif len(item) == 2: name, tp, = item elif len(item) == 3: @@ -1610,15 +1614,49 @@ class C(Base): seen.add(name) annotations[name] = tp + # We initially block the VALUE format, because inside dataclass() we'll + # call get_annotations(), which will try the VALUE format first. If we don't + # block, that means we'd always end up eagerly importing typing here, which + # is what we're trying to avoid. + value_blocked = True + + def annotate_method(format): + def get_any(): + match format: + case annotationlib.Format.STRING: + return 'typing.Any' + case annotationlib.Format.FORWARDREF: + typing = sys.modules.get("typing") + if typing is None: + return annotationlib.ForwardRef("Any", module="typing") + else: + return typing.Any + case annotationlib.Format.VALUE: + if value_blocked: + raise NotImplementedError + from typing import Any + return Any + case _: + raise NotImplementedError + annos = { + ann: get_any() if t is _ANY_MARKER else t + for ann, t in annotations.items() + } + if format == annotationlib.Format.STRING: + return annotationlib.annotations_to_string(annos) + else: + return annos + # Update 'ns' with the user-supplied namespace plus our calculated values. def exec_body_callback(ns): ns.update(namespace) ns.update(defaults) - ns['__annotations__'] = annotations # We use `types.new_class()` instead of simply `type()` to allow dynamic creation # of generic dataclasses. cls = types.new_class(cls_name, bases, {}, exec_body_callback) + # For now, set annotations including the _ANY_MARKER. + cls.__annotate__ = annotate_method # For pickling to work, the __module__ variable needs to be set to the frame # where the dataclass is created. @@ -1634,10 +1672,13 @@ def exec_body_callback(ns): cls.__module__ = module # Apply the normal provided decorator. - return decorator(cls, init=init, repr=repr, eq=eq, order=order, - unsafe_hash=unsafe_hash, frozen=frozen, - match_args=match_args, kw_only=kw_only, slots=slots, - weakref_slot=weakref_slot) + cls = decorator(cls, init=init, repr=repr, eq=eq, order=order, + unsafe_hash=unsafe_hash, frozen=frozen, + match_args=match_args, kw_only=kw_only, slots=slots, + weakref_slot=weakref_slot) + # Now that the class is ready, allow the VALUE format. + value_blocked = False + return cls def replace(obj, /, **changes): diff --git a/Lib/difflib.py b/Lib/difflib.py index 4bba9e7ea5c..f1f4e62514a 100644 --- a/Lib/difflib.py +++ b/Lib/difflib.py @@ -1038,11 +1038,9 @@ def _qformat(self, aline, bline, atags, btags): # remaining is that perhaps it was really the case that " volatile" # was inserted after "private". I can live with that . -import re - -def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match): +def IS_LINE_JUNK(line, pat=None): r""" - Return True for ignorable line: iff `line` is blank or contains a single '#'. + Return True for ignorable line: if `line` is blank or contains a single '#'. Examples: @@ -1054,6 +1052,11 @@ def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match): False """ + if pat is None: + # Default: match '#' or the empty string + return line.strip() in '#' + # Previous versions used the undocumented parameter 'pat' as a + # match function. Retain this behaviour for compatibility. return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): @@ -2027,7 +2030,6 @@ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False, replace('\1',''). \ replace('\t',' ') -del re def restore(delta, which): r""" diff --git a/Lib/dis.py b/Lib/dis.py index cb6d077a391..d6d2c1386dd 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -1131,7 +1131,7 @@ def dis(self): def main(args=None): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) parser.add_argument('-C', '--show-caches', action='store_true', help='show inline caches') parser.add_argument('-O', '--show-offsets', action='store_true', diff --git a/Lib/doctest.py b/Lib/doctest.py index e02e73ed722..2acb6cb79f3 100644 --- a/Lib/doctest.py +++ b/Lib/doctest.py @@ -2870,7 +2870,7 @@ def get(self): def _test(): import argparse - parser = argparse.ArgumentParser(description="doctest runner") + parser = argparse.ArgumentParser(description="doctest runner", color=True) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='print very verbose output for all tests') parser.add_argument('-o', '--option', action='append', diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index b2bc4afc1cc..9d80a5822af 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -30,7 +30,7 @@ NLCRE = re.compile(r'\r\n|\r|\n') NLCRE_bol = re.compile(r'(\r\n|\r|\n)') -NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z') +NLCRE_eol = re.compile(r'(\r\n|\r|\n)\z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') # RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py index a94bb270671..4ecb6b6e297 100644 --- a/Lib/encodings/aliases.py +++ b/Lib/encodings/aliases.py @@ -405,6 +405,8 @@ 'iso_8859_8' : 'iso8859_8', 'iso_8859_8_1988' : 'iso8859_8', 'iso_ir_138' : 'iso8859_8', + 'iso_8859_8_i' : 'iso8859_8', + 'iso_8859_8_e' : 'iso8859_8', # iso8859_9 codec 'csisolatin5' : 'iso8859_9', diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index 2b10d97ec66..aa641e94a8b 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -10,7 +10,7 @@ __all__ = ["version", "bootstrap"] -_PIP_VERSION = "25.0.1" +_PIP_VERSION = "25.1.1" # Directory of system wheel packages. Some Linux distribution packaging # policies recommend against bundling dependencies. For example, Fedora @@ -205,7 +205,7 @@ def _uninstall_helper(*, verbosity=0): def _main(argv=None): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) parser.add_argument( "--version", action="version", diff --git a/Lib/ensurepip/_bundled/pip-25.0.1-py3-none-any.whl b/Lib/ensurepip/_bundled/pip-25.1.1-py3-none-any.whl similarity index 67% rename from Lib/ensurepip/_bundled/pip-25.0.1-py3-none-any.whl rename to Lib/ensurepip/_bundled/pip-25.1.1-py3-none-any.whl index 8d3b0043ea5..2fdcfbf9ff8 100644 Binary files a/Lib/ensurepip/_bundled/pip-25.0.1-py3-none-any.whl and b/Lib/ensurepip/_bundled/pip-25.1.1-py3-none-any.whl differ diff --git a/Lib/enum.py b/Lib/enum.py index b5f3ca7ae11..01fecca3e5a 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -731,14 +731,16 @@ def __contains__(cls, value): """ if isinstance(value, cls): return True - try: - cls(value) - return True - except ValueError: - return ( - value in cls._unhashable_values_ # both structures are lists - or value in cls._hashable_values_ - ) + if issubclass(cls, Flag): + try: + result = cls._missing_(value) + return isinstance(result, cls) + except ValueError: + pass + return ( + value in cls._unhashable_values_ # both structures are lists + or value in cls._hashable_values_ + ) def __delattr__(cls, attr): # nicer error message when someone tries to delete an attribute diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 1dee8330f5d..10e1c936688 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -185,7 +185,7 @@ def _translate(pat, star, question_mark): def _join_translated_parts(parts, star_indices): if not star_indices: - return fr'(?s:{"".join(parts)})\Z' + return fr'(?s:{"".join(parts)})\z' iter_star_indices = iter(star_indices) j = next(iter_star_indices) buffer = parts[:j] # fixed pieces at the start @@ -206,4 +206,4 @@ def _join_translated_parts(parts, star_indices): append('.*') extend(parts[i:]) res = ''.join(buffer) - return fr'(?s:{res})\Z' + return fr'(?s:{res})\z' diff --git a/Lib/fractions.py b/Lib/fractions.py index f0cbc8c2e6c..063f28478c7 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -64,7 +64,7 @@ def _hash_algorithm(numerator, denominator): (?:\.(?P\d*|\d+(_\d+)*))? # an optional fractional part (?:E(?P[-+]?\d+(_\d+)*))? # and optional exponent ) - \s*\Z # and optional whitespace to finish + \s*\z # and optional whitespace to finish """, re.VERBOSE | re.IGNORECASE) @@ -238,11 +238,6 @@ def __new__(cls, numerator=0, denominator=None): self._denominator = 1 return self - elif isinstance(numerator, numbers.Rational): - self._numerator = numerator.numerator - self._denominator = numerator.denominator - return self - elif (isinstance(numerator, float) or (not isinstance(numerator, type) and hasattr(numerator, 'as_integer_ratio'))): @@ -278,6 +273,11 @@ def __new__(cls, numerator=0, denominator=None): if m.group('sign') == '-': numerator = -numerator + elif isinstance(numerator, numbers.Rational): + self._numerator = numerator.numerator + self._denominator = numerator.denominator + return self + else: raise TypeError("argument should be a string or a Rational " "instance or have the as_integer_ratio() method") @@ -905,8 +905,10 @@ def __pow__(a, b, modulo=None): else: return NotImplemented - def __rpow__(b, a): + def __rpow__(b, a, modulo=None): """a ** b""" + if modulo is not None: + return NotImplemented if b._denominator == 1 and b._numerator >= 0: # If a is an int, keep it that way if possible. return a ** b._numerator diff --git a/Lib/functools.py b/Lib/functools.py index 714070c6ac9..7f0eac3f650 100644 --- a/Lib/functools.py +++ b/Lib/functools.py @@ -323,6 +323,9 @@ def _partial_new(cls, func, /, *args, **keywords): "or a descriptor") if args and args[-1] is Placeholder: raise TypeError("trailing Placeholders are not allowed") + for value in keywords.values(): + if value is Placeholder: + raise TypeError("Placeholder cannot be passed as a keyword argument") if isinstance(func, base_cls): pto_phcount = func._phcount tot_args = func.args diff --git a/Lib/getpass.py b/Lib/getpass.py index bd0097ced94..1dd40e25e09 100644 --- a/Lib/getpass.py +++ b/Lib/getpass.py @@ -1,6 +1,7 @@ """Utilities to get a password and/or the current user name. -getpass(prompt[, stream]) - Prompt for a password, with echo turned off. +getpass(prompt[, stream[, echo_char]]) - Prompt for a password, with echo +turned off and optional keyboard feedback. getuser() - Get the user name from the environment or password database. GetPassWarning - This UserWarning is issued when getpass() cannot prevent @@ -25,13 +26,15 @@ class GetPassWarning(UserWarning): pass -def unix_getpass(prompt='Password: ', stream=None): +def unix_getpass(prompt='Password: ', stream=None, *, echo_char=None): """Prompt for a password, with echo turned off. Args: prompt: Written on stream to ask for the input. Default: 'Password: ' stream: A writable file object to display the prompt. Defaults to the tty. If no tty is available defaults to sys.stderr. + echo_char: A string used to mask input (e.g., '*'). If None, input is + hidden. Returns: The seKr3t input. Raises: @@ -40,6 +43,8 @@ def unix_getpass(prompt='Password: ', stream=None): Always restores terminal settings before returning. """ + _check_echo_char(echo_char) + passwd = None with contextlib.ExitStack() as stack: try: @@ -68,12 +73,16 @@ def unix_getpass(prompt='Password: ', stream=None): old = termios.tcgetattr(fd) # a copy to save new = old[:] new[3] &= ~termios.ECHO # 3 == 'lflags' + if echo_char: + new[3] &= ~termios.ICANON tcsetattr_flags = termios.TCSAFLUSH if hasattr(termios, 'TCSASOFT'): tcsetattr_flags |= termios.TCSASOFT try: termios.tcsetattr(fd, tcsetattr_flags, new) - passwd = _raw_input(prompt, stream, input=input) + passwd = _raw_input(prompt, stream, input=input, + echo_char=echo_char) + finally: termios.tcsetattr(fd, tcsetattr_flags, old) stream.flush() # issue7208 @@ -93,10 +102,11 @@ def unix_getpass(prompt='Password: ', stream=None): return passwd -def win_getpass(prompt='Password: ', stream=None): +def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): """Prompt for password with echo off, using Windows getwch().""" if sys.stdin is not sys.__stdin__: return fallback_getpass(prompt, stream) + _check_echo_char(echo_char) for c in prompt: msvcrt.putwch(c) @@ -108,25 +118,39 @@ def win_getpass(prompt='Password: ', stream=None): if c == '\003': raise KeyboardInterrupt if c == '\b': + if echo_char and pw: + msvcrt.putwch('\b') + msvcrt.putwch(' ') + msvcrt.putwch('\b') pw = pw[:-1] else: pw = pw + c + if echo_char: + msvcrt.putwch(echo_char) msvcrt.putwch('\r') msvcrt.putwch('\n') return pw -def fallback_getpass(prompt='Password: ', stream=None): +def fallback_getpass(prompt='Password: ', stream=None, *, echo_char=None): + _check_echo_char(echo_char) import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: stream = sys.stderr print("Warning: Password input may be echoed.", file=stream) - return _raw_input(prompt, stream) + return _raw_input(prompt, stream, echo_char=echo_char) -def _raw_input(prompt="", stream=None, input=None): +def _check_echo_char(echo_char): + # ASCII excluding control characters + if echo_char and not (echo_char.isprintable() and echo_char.isascii()): + raise ValueError("'echo_char' must be a printable ASCII string, " + f"got: {echo_char!r}") + + +def _raw_input(prompt="", stream=None, input=None, echo_char=None): # This doesn't save the string in the GNU readline history. if not stream: stream = sys.stderr @@ -143,6 +167,8 @@ def _raw_input(prompt="", stream=None, input=None): stream.write(prompt) stream.flush() # NOTE: The Python C API calls flockfile() (and unlock) during readline. + if echo_char: + return _readline_with_echo_char(stream, input, echo_char) line = input.readline() if not line: raise EOFError @@ -151,6 +177,35 @@ def _raw_input(prompt="", stream=None, input=None): return line +def _readline_with_echo_char(stream, input, echo_char): + passwd = "" + eof_pressed = False + while True: + char = input.read(1) + if char == '\n' or char == '\r': + break + elif char == '\x03': + raise KeyboardInterrupt + elif char == '\x7f' or char == '\b': + if passwd: + stream.write("\b \b") + stream.flush() + passwd = passwd[:-1] + elif char == '\x04': + if eof_pressed: + break + else: + eof_pressed = True + elif char == '\x00': + continue + else: + passwd += char + stream.write(echo_char) + stream.flush() + eof_pressed = False + return passwd + + def getuser(): """Get the username from the environment or password database. diff --git a/Lib/glob.py b/Lib/glob.py index 8879eff8041..1e48fe43167 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -316,7 +316,7 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): if idx < last_part_idx: results.append(any_sep) res = ''.join(results) - return fr'(?s:{res})\Z' + return fr'(?s:{res})\z' @functools.lru_cache(maxsize=512) @@ -358,6 +358,12 @@ def concat_path(path, text): """ raise NotImplementedError + @staticmethod + def stringify_path(path): + """Converts the path to a string object + """ + raise NotImplementedError + # High-level methods def compile(self, pat, altsep=None): @@ -466,8 +472,9 @@ def recursive_selector(self, part, parts): select_next = self.selector(parts) def select_recursive(path, exists=False): - match_pos = len(str(path)) - if match is None or match(str(path), match_pos): + path_str = self.stringify_path(path) + match_pos = len(path_str) + if match is None or match(path_str, match_pos): yield from select_next(path, exists) stack = [path] while stack: @@ -489,7 +496,7 @@ def select_recursive_step(stack, match_pos): pass if is_dir or not dir_only: - entry_path_str = str(entry_path) + entry_path_str = self.stringify_path(entry_path) if dir_only: entry_path = self.concat_path(entry_path, self.sep) if match is None or match(entry_path_str, match_pos): @@ -529,19 +536,6 @@ def scandir(path): entries = list(scandir_it) return ((entry, entry.name, entry.path) for entry in entries) - -class _PathGlobber(_GlobberBase): - """Provides shell-style pattern matching and globbing for pathlib paths. - """ - @staticmethod - def lexists(path): - return path.info.exists(follow_symlinks=False) - - @staticmethod - def scandir(path): - return ((child.info, child.name, child) for child in path.iterdir()) - - @staticmethod - def concat_path(path, text): - return path.with_segments(str(path) + text) + def stringify_path(path): + return path # Already a string. diff --git a/Lib/gzip.py b/Lib/gzip.py index 2a6eea1b393..c00f51858de 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -5,7 +5,6 @@ # based on Andrew Kuchling's minigzip.py distributed with the zlib module -import _compression import builtins import io import os @@ -14,6 +13,7 @@ import time import weakref import zlib +from compression._common import _streams __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] @@ -144,7 +144,7 @@ def writable(self): return True -class GzipFile(_compression.BaseStream): +class GzipFile(_streams.BaseStream): """The GzipFile class simulates most of the methods of a file object with the exception of the truncate() method. @@ -523,7 +523,7 @@ def _read_gzip_header(fp): return last_mtime -class _GzipReader(_compression.DecompressReader): +class _GzipReader(_streams.DecompressReader): def __init__(self, fp): super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor, wbits=-zlib.MAX_WBITS) @@ -667,7 +667,9 @@ def main(): from argparse import ArgumentParser parser = ArgumentParser(description= "A simple command line interface for the gzip module: act like gzip, " - "but do not delete the input file.") + "but do not delete the input file.", + color=True, + ) group = parser.add_mutually_exclusive_group() group.add_argument('--fast', action='store_true', help='compress faster') group.add_argument('--best', action='store_true', help='compress better') diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 1b2c30cc32f..abacac22ea0 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -231,6 +231,8 @@ def file_digest(fileobj, digest, /, *, _bufsize=2**18): view = memoryview(buf) while True: size = fileobj.readinto(buf) + if size is None: + raise BlockingIOError("I/O operation would block.") if size == 0: break # EOF digestobj.update(view[:size]) diff --git a/Lib/heapq.py b/Lib/heapq.py index 9649da251f2..6ceb211f1ca 100644 --- a/Lib/heapq.py +++ b/Lib/heapq.py @@ -178,7 +178,7 @@ def heapify(x): for i in reversed(range(n//2)): _siftup(x, i) -def _heappop_max(heap): +def heappop_max(heap): """Maxheap version of a heappop.""" lastelt = heap.pop() # raises appropriate IndexError if heap is empty if heap: @@ -188,19 +188,32 @@ def _heappop_max(heap): return returnitem return lastelt -def _heapreplace_max(heap, item): +def heapreplace_max(heap, item): """Maxheap version of a heappop followed by a heappush.""" returnitem = heap[0] # raises appropriate IndexError if heap is empty heap[0] = item _siftup_max(heap, 0) return returnitem -def _heapify_max(x): +def heappush_max(heap, item): + """Maxheap version of a heappush.""" + heap.append(item) + _siftdown_max(heap, 0, len(heap)-1) + +def heappushpop_max(heap, item): + """Maxheap fast version of a heappush followed by a heappop.""" + if heap and item < heap[0]: + item, heap[0] = heap[0], item + _siftup_max(heap, 0) + return item + +def heapify_max(x): """Transform list into a maxheap, in-place, in O(len(x)) time.""" n = len(x) for i in reversed(range(n//2)): _siftup_max(x, i) + # 'heap' is a heap at all indices >= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant. @@ -335,9 +348,9 @@ def merge(*iterables, key=None, reverse=False): h_append = h.append if reverse: - _heapify = _heapify_max - _heappop = _heappop_max - _heapreplace = _heapreplace_max + _heapify = heapify_max + _heappop = heappop_max + _heapreplace = heapreplace_max direction = -1 else: _heapify = heapify @@ -490,10 +503,10 @@ def nsmallest(n, iterable, key=None): result = [(elem, i) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: if elem < top: _heapreplace(result, (elem, order)) @@ -507,10 +520,10 @@ def nsmallest(n, iterable, key=None): result = [(key(elem), i, elem) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: k = key(elem) if k < top: @@ -583,19 +596,13 @@ def nlargest(n, iterable, key=None): from _heapq import * except ImportError: pass -try: - from _heapq import _heapreplace_max -except ImportError: - pass -try: - from _heapq import _heapify_max -except ImportError: - pass -try: - from _heapq import _heappop_max -except ImportError: - pass +# For backwards compatibility +_heappop_max = heappop_max +_heapreplace_max = heapreplace_max +_heappush_max = heappush_max +_heappushpop_max = heappushpop_max +_heapify_max = heapify_max if __name__ == "__main__": diff --git a/Lib/hmac.py b/Lib/hmac.py index 2af11c26947..3683a4aa653 100644 --- a/Lib/hmac.py +++ b/Lib/hmac.py @@ -81,13 +81,13 @@ def __init(self, key, msg, digestmod): try: self._init_openssl_hmac(key, msg, digestmod) return - except _hashopenssl.UnsupportedDigestmodError: + except _hashopenssl.UnsupportedDigestmodError: # pragma: no cover pass if _hmac and isinstance(digestmod, str): try: self._init_builtin_hmac(key, msg, digestmod) return - except _hmac.UnknownHashError: + except _hmac.UnknownHashError: # pragma: no cover pass self._init_old(key, msg, digestmod) @@ -121,12 +121,12 @@ def _init_old(self, key, msg, digestmod): warnings.warn(f"block_size of {blocksize} seems too small; " f"using our default of {self.blocksize}.", RuntimeWarning, 2) - blocksize = self.blocksize + blocksize = self.blocksize # pragma: no cover else: warnings.warn("No block_size attribute on given digest object; " f"Assuming {self.blocksize}.", RuntimeWarning, 2) - blocksize = self.blocksize + blocksize = self.blocksize # pragma: no cover if len(key) > blocksize: key = digest_cons(key).digest() diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 13c95c34e50..1e30956fe24 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -12,6 +12,7 @@ import _markupbase from html import unescape +from html.entities import html5 as html5_entities __all__ = ['HTMLParser'] @@ -23,6 +24,7 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') piclose = re.compile('>') @@ -57,6 +59,22 @@ # ') +# Character reference processing logic specific to attribute values +# See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state +def _replace_attr_charref(match): + ref = match.group(0) + # Numeric / hex char refs must always be unescaped + if ref.startswith('&#'): + return unescape(ref) + # Named character / entity references must only be unescaped + # if they are an exact match, and they are not followed by an equals sign + if not ref.endswith('=') and ref[1:] in html5_entities: + return unescape(ref) + # Otherwise do not unescape + return ref + +def _unescape_attrvalue(s): + return attr_charref.sub(_replace_attr_charref, s) class HTMLParser(_markupbase.ParserBase): @@ -242,7 +260,7 @@ def goahead(self, end): else: assert 0, "interesting.search() lied" # end while - if end and i < n and not self.cdata_elem: + if end and i < n: if self.convert_charrefs and not self.cdata_elem: self.handle_data(unescape(rawdata[i:n])) else: @@ -260,7 +278,7 @@ def parse_html_declaration(self, i): if rawdata[i:i+4] == ' ¬-an-entity-ref; +

    + ''""" + s = f'