[3.12] Convert change detection to a Python script (GH-129627) (#130370)

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) <wk.cvs.github@sydorenko.org.ua>
2025-12-31 04:23:37 +00:00 · 2025-02-21 17:18:44 +02:00 · 2025-02-21 17:18:44 +02:00 · 91e5e246b3
commit 91e5e246b3
parent dd5bf313c6
3 changed files with 321 additions and 44 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@ -22,32 +22,32 @@ env:
  FORCE_COLOR: 1

 jobs:
-  check_source:
+  build-context:
    name: Change detection
    # To use boolean outputs from this job, parse them as JSON.
    # Here's some examples:
    #
-    #   if: fromJSON(needs.check_source.outputs.run-docs)
+    #   if: fromJSON(needs.build-context.outputs.run-docs)
    #
    #   ${{
-    #        fromJSON(needs.check_source.outputs.run_tests)
+    #        fromJSON(needs.build-context.outputs.run-tests)
    #        && 'truthy-branch'
    #        || 'falsy-branch'
    #   }}
    #
-    uses: ./.github/workflows/reusable-change-detection.yml
+    uses: ./.github/workflows/reusable-context.yml

  check-docs:
    name: Docs
-    needs: check_source
-    if: fromJSON(needs.check_source.outputs.run-docs)
+    needs: build-context
+    if: fromJSON(needs.build-context.outputs.run-docs)
    uses: ./.github/workflows/reusable-docs.yml

  check_abi:
    name: 'Check if the ABI has changed'
    runs-on: ubuntu-22.04
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    steps:
      - uses: actions/checkout@v4
        with:
@ -96,8 +96,8 @@ jobs:
    container:
      image: ghcr.io/python/autoconf:2024.10.16.11360930377
    timeout-minutes: 60
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    steps:
      - name: Install Git
        run: |
@ -137,8 +137,8 @@ jobs:
    # reproducible: to get the same tools versions (autoconf, aclocal, ...)
    runs-on: ubuntu-24.04
    timeout-minutes: 60
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    steps:
      - uses: actions/checkout@v4
        with:
@ -153,7 +153,7 @@ jobs:
        with:
          path: config.cache
          # Include env.pythonLocation in key to avoid changes in environment when setup-python updates Python
-          key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.check_source.outputs.config_hash }}-${{ env.pythonLocation }}
+          key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }}-${{ env.pythonLocation }}
      - name: Install dependencies
        run: sudo ./.github/workflows/posix-deps-apt.sh
      - name: Add ccache to PATH
@ -199,8 +199,8 @@ jobs:
    name: >-
      Windows
      ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }}
-    needs: check_source
-    if: fromJSON(needs.check_source.outputs.run_tests)
+    needs: build-context
+    if: fromJSON(needs.build-context.outputs.run-tests)
    strategy:
      matrix:
        arch:
@ -218,8 +218,8 @@ jobs:
  build_windows_msi:
    name: >-  # ${{ '' } is a hack to nest jobs under the same sidebar category
      Windows MSI${{ '' }}
-    needs: check_source
-    if: fromJSON(needs.check_source.outputs.run-win-msi)
+    needs: build-context
+    if: fromJSON(needs.build-context.outputs.run-windows-msi)
    strategy:
      matrix:
        arch:
@ -234,8 +234,8 @@ jobs:
    name: >-
      macOS
      ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }}
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    strategy:
      fail-fast: false
      matrix:
@ -260,7 +260,7 @@ jobs:
          free-threading: true
    uses: ./.github/workflows/reusable-macos.yml
    with:
-      config_hash: ${{ needs.check_source.outputs.config_hash }}
+      config_hash: ${{ needs.build-context.outputs.config-hash }}
      free-threading: ${{ matrix.free-threading }}
      os: ${{ matrix.os }}

@ -268,8 +268,8 @@ jobs:
    name: >-
      Ubuntu
      ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }}
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    strategy:
      matrix:
        free-threading:
@ -277,15 +277,15 @@ jobs:
        # - true
    uses: ./.github/workflows/reusable-ubuntu.yml
    with:
-      config_hash: ${{ needs.check_source.outputs.config_hash }}
+      config_hash: ${{ needs.build-context.outputs.config-hash }}
      free-threading: ${{ matrix.free-threading }}

  build_ubuntu_ssltests:
    name: 'Ubuntu SSL tests with OpenSSL'
    runs-on: ${{ matrix.os }}
    timeout-minutes: 60
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    strategy:
      fail-fast: false
      matrix:
@ -306,7 +306,7 @@ jobs:
      uses: actions/cache@v4
      with:
        path: config.cache
-        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.check_source.outputs.config_hash }}
+        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }}
    - name: Register gcc problem matcher
      run: echo "::add-matcher::.github/problem-matchers/gcc.json"
    - name: Install dependencies
@ -345,8 +345,8 @@ jobs:
    name: "Hypothesis tests on Ubuntu"
    runs-on: ubuntu-24.04
    timeout-minutes: 60
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true' && needs.check_source.outputs.run_hypothesis == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    env:
      OPENSSL_VER: 3.0.15
      PYTHONSTRICTEXTENSIONBUILD: 1
@ -393,7 +393,7 @@ jobs:
      uses: actions/cache@v4
      with:
        path: ${{ env.CPYTHON_BUILDDIR }}/config.cache
-        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.check_source.outputs.config_hash }}
+        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }}
    - name: Configure CPython out-of-tree
      working-directory: ${{ env.CPYTHON_BUILDDIR }}
      run: |
@ -460,8 +460,8 @@ jobs:
    name: 'Address sanitizer'
    runs-on: ubuntu-22.04
    timeout-minutes: 60
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    strategy:
      matrix:
        os: [ubuntu-24.04]
@ -479,7 +479,7 @@ jobs:
      uses: actions/cache@v4
      with:
        path: config.cache
-        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.check_source.outputs.config_hash }}
+        key: ${{ github.job }}-${{ env.IMAGE_OS_VERSION }}-${{ needs.build-context.outputs.config-hash }}
    - name: Register gcc problem matcher
      run: echo "::add-matcher::.github/problem-matchers/gcc.json"
    - name: Install dependencies
@ -523,15 +523,15 @@ jobs:
    name: >-
      Thread sanitizer
      ${{ fromJSON(matrix.free-threading) && '(free-threading)' || '' }}
-    needs: check_source
-    if: needs.check_source.outputs.run_tests == 'true'
+    needs: build-context
+    if: needs.build-context.outputs.run-tests == 'true'
    strategy:
      matrix:
        free-threading:
        - false
    uses: ./.github/workflows/reusable-tsan.yml
    with:
-      config_hash: ${{ needs.check_source.outputs.config_hash }}
+      config_hash: ${{ needs.build-context.outputs.config-hash }}
      free-threading: ${{ matrix.free-threading }}

  all-required-green:  # This job does nothing and is only used for the branch protection
@ -539,7 +539,7 @@ jobs:
    if: always()

    needs:
-    - check_source  # Transitive dependency, needed to access `run_tests` value
+    - build-context  # Transitive dependency, needed to access `run-tests` value
    - check-docs
    - check_autoconf_regen
    - check_generated_files
@ -564,14 +564,14 @@ jobs:
          test_hypothesis,
        allowed-skips: >-
          ${{
-            !fromJSON(needs.check_source.outputs.run-docs)
+            !fromJSON(needs.build-context.outputs.run-docs)
            && '
            check-docs,
            '
            || ''
          }}
          ${{
-            needs.check_source.outputs.run_tests != 'true'
+            needs.build-context.outputs.run-tests != 'true'
            && '
            check_autoconf_regen,
            check_generated_files,
@ -581,12 +581,6 @@ jobs:
            build_windows,
            build_asan,
            build_tsan,
-            '
-            || ''
-          }}
-          ${{
-            !fromJSON(needs.check_source.outputs.run_hypothesis)
-            && '
            test_hypothesis,
            '
            || ''
--- a/.github/workflows/reusable-context.yml
+++ b/.github/workflows/reusable-context.yml
@ -0,0 +1,100 @@
+name: Reusable build context
+
+on:  # yamllint disable-line rule:truthy
+  workflow_call:
+    outputs:
+      # Every referenced step MUST always set its output variable,
+      # either via ``Tools/build/compute-changes.py`` or in this workflow file.
+      # Boolean outputs (generally prefixed ``run-``) can then later be used
+      # safely through the following idiom in job conditionals and other
+      # expressions. Here's some examples:
+      #
+      #   if: fromJSON(needs.build-context.outputs.run-tests)
+      #
+      #   ${{
+      #        fromJSON(needs.build-context.outputs.run-tests)
+      #        && 'truthy-branch'
+      #        || 'falsy-branch'
+      #   }}
+      #
+      config-hash:
+        description: Config hash value for use in cache keys
+        value: ${{ jobs.compute-changes.outputs.config-hash }}  # str
+      run-docs:
+        description: Whether to build the docs
+        value: ${{ jobs.compute-changes.outputs.run-docs }}  # bool
+      run-tests:
+        description: Whether to run the regular tests
+        value: ${{ jobs.compute-changes.outputs.run-tests  }}  # bool
+      run-windows-msi:
+        description: Whether to run the MSI installer smoke tests
+        value: ${{ jobs.compute-changes.outputs.run-windows-msi }}  # bool
+      run-ci-fuzz:
+        description: Whether to run the CIFuzz job
+        value: ${{ jobs.compute-changes.outputs.run-ci-fuzz }}  # bool
+
+jobs:
+  compute-changes:
+    name: Create context from changed files
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    outputs:
+      config-hash: ${{ steps.config-hash.outputs.hash }}
+      run-ci-fuzz: ${{ steps.changes.outputs.run-ci-fuzz }}
+      run-docs: ${{ steps.changes.outputs.run-docs }}
+      run-tests: ${{ steps.changes.outputs.run-tests }}
+      run-windows-msi: ${{ steps.changes.outputs.run-windows-msi }}
+    steps:
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3"
+
+    - run: >-
+        echo '${{ github.event_name }}'
+
+    - uses: actions/checkout@v4
+      with:
+        persist-credentials: false
+        ref: >-
+          ${{
+            github.event_name == 'pull_request'
+            && github.event.pull_request.head.sha
+            || ''
+          }}
+
+    # Adapted from https://github.com/actions/checkout/issues/520#issuecomment-1167205721
+    - name: Fetch commits to get branch diff
+      if: github.event_name == 'pull_request'
+      run: |
+        set -eux
+
+        # Fetch enough history to find a common ancestor commit (aka merge-base):
+        git fetch origin "${refspec_pr}" --depth=$(( commits + 1 )) \
+          --no-tags --prune --no-recurse-submodules
+
+        # This should get the oldest commit in the local fetched history (which may not be the commit the PR branched from):
+        COMMON_ANCESTOR=$( git rev-list --first-parent --max-parents=0 --max-count=1 "${branch_pr}" )
+        DATE=$( git log --date=iso8601 --format=%cd "${COMMON_ANCESTOR}" )
+
+        # Get all commits since that commit date from the base branch (eg: main):
+        git fetch origin "${refspec_base}" --shallow-since="${DATE}" \
+          --no-tags --prune --no-recurse-submodules
+      env:
+        branch_pr: 'origin/${{ github.event.pull_request.head.ref }}'
+        commits: ${{ github.event.pull_request.commits }}
+        refspec_base: '+${{ github.event.pull_request.base.sha }}:remotes/origin/${{ github.event.pull_request.base.ref }}'
+        refspec_pr: '+${{ github.event.pull_request.head.sha }}:remotes/origin/${{ github.event.pull_request.head.ref }}'
+
+    # We only want to run tests on PRs when related files are changed,
+    # or when someone triggers a manual workflow run.
+    - name: Compute changed files
+      id: changes
+      run: python Tools/build/compute-changes.py
+      env:
+        GITHUB_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+
+    - name: Compute hash for config cache key
+      id: config-hash
+      run: |
+        echo "hash=${{ hashFiles('configure', 'configure.ac', '.github/workflows/build.yml') }}" >> "$GITHUB_OUTPUT"
--- a/Tools/build/compute-changes.py
+++ b/Tools/build/compute-changes.py
@ -0,0 +1,183 @@
+"""Determine which GitHub Actions workflows to run.
+
+Called by ``.github/workflows/reusable-context.yml``.
+We only want to run tests on PRs when related files are changed,
+or when someone triggers a manual workflow run.
+This improves developer experience by not doing (slow)
+unnecessary work in GHA, and saves CI resources.
+"""
+
+from __future__ import annotations
+
+import os
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from collections.abc import Set
+
+GITHUB_DEFAULT_BRANCH = os.environ["GITHUB_DEFAULT_BRANCH"]
+GITHUB_CODEOWNERS_PATH = Path(".github/CODEOWNERS")
+GITHUB_WORKFLOWS_PATH = Path(".github/workflows")
+CONFIGURATION_FILE_NAMES = frozenset({
+    ".pre-commit-config.yaml",
+    ".ruff.toml",
+    "mypy.ini",
+})
+SUFFIXES_C_OR_CPP = frozenset({".c", ".h", ".cpp"})
+SUFFIXES_DOCUMENTATION = frozenset({".rst", ".md"})
+
+
+@dataclass(kw_only=True, slots=True)
+class Outputs:
+    run_ci_fuzz: bool = False
+    run_docs: bool = False
+    run_tests: bool = False
+    run_windows_msi: bool = False
+
+
+def compute_changes() -> None:
+    target_branch, head_branch = git_branches()
+    if target_branch and head_branch:
+        # Getting changed files only makes sense on a pull request
+        files = get_changed_files(
+            f"origin/{target_branch}", f"origin/{head_branch}"
+        )
+        outputs = process_changed_files(files)
+    else:
+        # Otherwise, just run the tests
+        outputs = Outputs(run_tests=True)
+    outputs = process_target_branch(outputs, target_branch)
+
+    if outputs.run_tests:
+        print("Run tests")
+
+    if outputs.run_ci_fuzz:
+        print("Run CIFuzz tests")
+    else:
+        print("Branch too old for CIFuzz tests; or no C files were changed")
+
+    if outputs.run_docs:
+        print("Build documentation")
+
+    if outputs.run_windows_msi:
+        print("Build Windows MSI")
+
+    print(outputs)
+
+    write_github_output(outputs)
+
+
+def git_branches() -> tuple[str, str]:
+    target_branch = os.environ.get("GITHUB_BASE_REF", "")
+    target_branch = target_branch.removeprefix("refs/heads/")
+    print(f"target branch: {target_branch!r}")
+
+    head_branch = os.environ.get("GITHUB_HEAD_REF", "")
+    head_branch = head_branch.removeprefix("refs/heads/")
+    print(f"head branch: {head_branch!r}")
+    return target_branch, head_branch
+
+
+def get_changed_files(
+    ref_a: str = GITHUB_DEFAULT_BRANCH, ref_b: str = "HEAD"
+) -> Set[Path]:
+    """List the files changed between two Git refs, filtered by change type."""
+    args = ("git", "diff", "--name-only", f"{ref_a}...{ref_b}", "--")
+    print(*args)
+    changed_files_result = subprocess.run(
+        args, stdout=subprocess.PIPE, check=True, encoding="utf-8"
+    )
+    changed_files = changed_files_result.stdout.strip().splitlines()
+    return frozenset(map(Path, filter(None, map(str.strip, changed_files))))
+
+
+def process_changed_files(changed_files: Set[Path]) -> Outputs:
+    run_tests = False
+    run_ci_fuzz = False
+    run_docs = False
+    run_windows_msi = False
+
+    for file in changed_files:
+        # Documentation files
+        doc_or_misc = file.parts[0] in {"Doc", "Misc"}
+        doc_file = file.suffix in SUFFIXES_DOCUMENTATION or doc_or_misc
+
+        if file.parent == GITHUB_WORKFLOWS_PATH:
+            if file.name == "build.yml":
+                run_tests = run_ci_fuzz = True
+            if file.name == "reusable-docs.yml":
+                run_docs = True
+            if file.name == "reusable-windows-msi.yml":
+                run_windows_msi = True
+
+        if not (
+            doc_file
+            or file == GITHUB_CODEOWNERS_PATH
+            or file.name in CONFIGURATION_FILE_NAMES
+        ):
+            run_tests = True
+
+        # The fuzz tests are pretty slow so they are executed only for PRs
+        # changing relevant files.
+        if file.suffix in SUFFIXES_C_OR_CPP:
+            run_ci_fuzz = True
+        if file.parts[:2] in {
+            ("configure",),
+            ("Modules", "_xxtestfuzz"),
+        }:
+            run_ci_fuzz = True
+
+        # Check for changed documentation-related files
+        if doc_file:
+            run_docs = True
+
+        # Check for changed MSI installer-related files
+        if file.parts[:2] == ("Tools", "msi"):
+            run_windows_msi = True
+
+    return Outputs(
+        run_ci_fuzz=run_ci_fuzz,
+        run_docs=run_docs,
+        run_tests=run_tests,
+        run_windows_msi=run_windows_msi,
+    )
+
+
+def process_target_branch(outputs: Outputs, git_branch: str) -> Outputs:
+    if not git_branch:
+        outputs.run_tests = True
+
+    # CIFuzz / OSS-Fuzz compatibility with older branches may be broken.
+    if git_branch != GITHUB_DEFAULT_BRANCH:
+        outputs.run_ci_fuzz = False
+
+    if os.environ.get("GITHUB_EVENT_NAME", "").lower() == "workflow_dispatch":
+        outputs.run_docs = True
+        outputs.run_windows_msi = True
+
+    return outputs
+
+
+def write_github_output(outputs: Outputs) -> None:
+    # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/store-information-in-variables#default-environment-variables
+    # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-an-output-parameter
+    if "GITHUB_OUTPUT" not in os.environ:
+        print("GITHUB_OUTPUT not defined!")
+        return
+
+    with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as f:
+        f.write(f"run-ci-fuzz={bool_lower(outputs.run_ci_fuzz)}\n")
+        f.write(f"run-docs={bool_lower(outputs.run_docs)}\n")
+        f.write(f"run-tests={bool_lower(outputs.run_tests)}\n")
+        f.write(f"run-windows-msi={bool_lower(outputs.run_windows_msi)}\n")
+
+
+def bool_lower(value: bool, /) -> str:
+    return "true" if value else "false"
+
+
+if __name__ == "__main__":
+    compute_changes()