GH-136895: Update JIT builds to use LLVM 20 (#140329)

Co-authored-by: Emma Harper Smith <emma@emmatyping.dev>
This commit is contained in:
Savannah Ostrowski 2025-11-03 10:01:44 -08:00 committed by GitHub
parent b373d3494c
commit 4e2ff4ac4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 151 additions and 50 deletions

View file

@ -68,7 +68,7 @@ jobs:
- true - true
- false - false
llvm: llvm:
- 19 - 20
include: include:
- target: i686-pc-windows-msvc/msvc - target: i686-pc-windows-msvc/msvc
architecture: Win32 architecture: Win32
@ -138,7 +138,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
llvm: llvm:
- 19 - 20
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@ -166,7 +166,7 @@ jobs:
fail-fast: false fail-fast: false
matrix: matrix:
llvm: llvm:
- 19 - 20
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:

View file

@ -0,0 +1 @@
Update JIT compilation to use LLVM 20 at build time.

View file

@ -3,6 +3,7 @@
import argparse import argparse
import os import os
import pathlib import pathlib
import shutil
import sys import sys
import time import time
import urllib.error import urllib.error
@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
) )
except (urllib.error.URLError, ConnectionError) as ex: except (urllib.error.URLError, ConnectionError) as ex:
if attempt == max_retries: if attempt == max_retries:
msg = f"Download from {download_location} failed." raise OSError(f'Download from {download_location} failed.') from ex
raise OSError(msg) from ex
time.sleep(2.25**attempt) time.sleep(2.25**attempt)
else: else:
return resp return resp
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose): def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
repo = f'cpython-{"bin" if binary else "source"}-deps' repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip' url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
reporthook = None reporthook = None
if verbose: if verbose:
@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
return filename return filename
def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
reporthook = None
if verbose:
reporthook = print
tarball_dir.mkdir(parents=True, exist_ok=True)
output_path = tarball_dir / f'{tag}.tar.xz'
retrieve_with_retries(url, output_path, reporthook)
return output_path
def extract_tarball(externals_dir, tarball_path, tag):
output_path = externals_dir / tag
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
return output_path
def extract_zip(externals_dir, zip_path): def extract_zip(externals_dir, zip_path):
with zipfile.ZipFile(os.fspath(zip_path)) as zf: with zipfile.ZipFile(os.fspath(zip_path)) as zf:
zf.extractall(os.fspath(externals_dir)) zf.extractall(os.fspath(externals_dir))
@ -55,6 +71,8 @@ def parse_args():
p.add_argument('-v', '--verbose', action='store_true') p.add_argument('-v', '--verbose', action='store_true')
p.add_argument('-b', '--binary', action='store_true', p.add_argument('-b', '--binary', action='store_true',
help='Is the dependency in the binary repo?') help='Is the dependency in the binary repo?')
p.add_argument('-r', '--release', action='store_true',
help='Download from GitHub release assets instead of branch')
p.add_argument('-O', '--organization', p.add_argument('-O', '--organization',
help='Organization owning the deps repos', default='python') help='Organization owning the deps repos', default='python')
p.add_argument('-e', '--externals-dir', type=pathlib.Path, p.add_argument('-e', '--externals-dir', type=pathlib.Path,
@ -67,15 +85,36 @@ def parse_args():
def main(): def main():
args = parse_args() args = parse_args()
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
final_name = args.externals_dir / args.tag final_name = args.externals_dir / args.tag
extracted = extract_zip(args.externals_dir, zip_path)
# Check if the dependency already exists in externals/ directory
# (either already downloaded/extracted, or checked into the git tree)
if final_name.exists():
if args.verbose:
print(f'{args.tag} already exists at {final_name}, skipping download.')
return
# Determine download method: release artifacts for large deps (like LLVM),
# otherwise zip download from GitHub branches
if args.release:
tarball_path = fetch_release(
args.tag,
args.externals_dir / 'tarballs',
org=args.organization,
verbose=args.verbose,
)
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
else:
# Use zip download from GitHub branches
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
extracted = extract_zip(args.externals_dir, zip_path)
for wait in [1, 2, 3, 5, 8, 0]: for wait in [1, 2, 3, 5, 8, 0]:
try: try:
extracted.replace(final_name) extracted.replace(final_name)

View file

@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18 if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0 if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06 if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0 if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0
for %%b in (%binaries%) do ( for %%b in (%binaries%) do (
if exist "%EXTERNALS_DIR%\%%b" ( if exist "%EXTERNALS_DIR%\%%b" (
@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b" git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
) else ( ) else (
echo.Fetching %%b... echo.Fetching %%b...
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b if "%%b"=="llvm-20.1.8.0" (
%PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
) else (
%PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
)
) )
) )

View file

@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
} }
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state); void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
#include "jit_stencils.h" #include "jit_stencils.h"
#if defined(__aarch64__) || defined(_M_ARM64) #if defined(__aarch64__) || defined(_M_ARM64)
#define TRAMPOLINE_SIZE 16 #define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8 #define DATA_ALIGN 8
#elif defined(__x86_64__) && defined(__APPLE__)
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
// range.
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
#define DATA_ALIGN 8
#else #else
#define TRAMPOLINE_SIZE 0 #define TRAMPOLINE_SIZE 0
#define DATA_ALIGN 1 #define DATA_ALIGN 1
#endif #endif
// Get the trampoline memory location for a given symbol ordinal.
static unsigned char *
get_trampoline_slot(int ordinal, jit_state *state)
{
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);
// Count the number of set bits in the trampoline mask lower than ordinal
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
return trampoline;
}
// Generate and patch AArch64 trampolines. The symbols to jump to are stored // Generate and patch AArch64 trampolines. The symbols to jump to are stored
// in the jit_stencils.h in the symbols_map. // in the jit_stencils.h in the symbols_map.
void void
@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
return; return;
} }
// Masking is done modulo 32 as the mask is stored as an array of uint32_t // Out of range - need a trampoline
const uint32_t symbol_mask = 1 << (ordinal % 32); uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);
// Count the number of set bits in the trampoline mask lower than ordinal,
// this gives the index into the array of trampolines.
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
/* Generate the trampoline /* Generate the trampoline
@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
patch_aarch64_26r(location, (uintptr_t)p); patch_aarch64_26r(location, (uintptr_t)p);
} }
// Generate and patch x86_64 trampolines.
void
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
{
uint64_t value = (uintptr_t)symbols_map[ordinal];
int64_t range = (int64_t)value - 4 - (int64_t)location;
// If we are in range of 32 signed bits, we can patch directly
if (range >= -(1LL << 31) && range < (1LL << 31)) {
patch_32r(location, value - 4);
return;
}
// Out of range - need a trampoline
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
/* Generate the trampoline (14 bytes, padded to 16):
0: ff 25 00 00 00 00 jmp *(%rip)
6: XX XX XX XX XX XX XX XX (64-bit target address)
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
*/
trampoline[0] = 0xFF;
trampoline[1] = 0x25;
memset(trampoline + 2, 0, 4);
memcpy(trampoline + 6, &value, 8);
// Patch the call site to call the trampoline instead
patch_32r(location, (uintptr_t)trampoline - 4);
}
static void static void
combine_symbol_mask(const symbol_mask src, symbol_mask dest) combine_symbol_mask(const symbol_mask src, symbol_mask dest)
{ {

View file

@ -9,32 +9,32 @@ ## Installing LLVM
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
It's easy to install all of the required tools: It's easy to install all of the required tools:
### Linux ### Linux
Install LLVM 19 on Ubuntu/Debian: Install LLVM 20 on Ubuntu/Debian:
```sh ```sh
wget https://apt.llvm.org/llvm.sh wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh chmod +x llvm.sh
sudo ./llvm.sh 19 sudo ./llvm.sh 20
``` ```
Install LLVM 19 on Fedora Linux 40 or newer: Install LLVM 20 on Fedora Linux 40 or newer:
```sh ```sh
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19' sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
``` ```
### macOS ### macOS
Install LLVM 19 with [Homebrew](https://brew.sh): Install LLVM 20 with [Homebrew](https://brew.sh):
```sh ```sh
brew install llvm@19 brew install llvm@20
``` ```
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them. Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
@ -43,18 +43,18 @@ ### Windows
LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`. LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
Alternatively, you can use [chocolatey](https://chocolatey.org): Alternatively, you can use [chocolatey](https://chocolatey.org):
```sh ```sh
choco install llvm --version=19.1.0 choco install llvm --version=20.1.8
``` ```
### Dev Containers ### Dev Containers
If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box. need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
## Building ## Building

View file

@ -11,8 +11,8 @@
import _targets import _targets
_LLVM_VERSION = "19" _LLVM_VERSION = "20"
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0" _EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
_P = typing.ParamSpec("_P") _P = typing.ParamSpec("_P")
_R = typing.TypeVar("_R") _R = typing.TypeVar("_R")

View file

@ -253,6 +253,23 @@ def process_relocations(self, known_symbols: dict[str, int]) -> None:
self._trampolines.add(ordinal) self._trampolines.add(ordinal)
hole.addend = ordinal hole.addend = ordinal
hole.symbol = None hole.symbol = None
# x86_64 Darwin trampolines for external symbols
elif (
hole.kind == "X86_64_RELOC_BRANCH"
and hole.value is HoleValue.ZERO
and hole.symbol not in self.symbols
):
hole.func = "patch_x86_64_trampoline"
hole.need_state = True
assert hole.symbol is not None
if hole.symbol in known_symbols:
ordinal = known_symbols[hole.symbol]
else:
ordinal = len(known_symbols)
known_symbols[hole.symbol] = ordinal
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
self.data.pad(8) self.data.pad(8)
for stencil in [self.code, self.data]: for stencil in [self.code, self.data]:
for hole in stencil.holes: for hole in stencil.holes:

View file

@ -166,10 +166,6 @@ async def _compile(
"-fno-asynchronous-unwind-tables", "-fno-asynchronous-unwind-tables",
# Don't call built-in functions that we can't find or patch: # Don't call built-in functions that we can't find or patch:
"-fno-builtin", "-fno-builtin",
# Emit relaxable 64-bit calls/jumps, so we don't have to worry about
# about emitting in-range trampolines for out-of-range targets.
# We can probably remove this and emit trampolines in the future:
"-fno-plt",
# Don't call stack-smashing canaries that we can't find or patch: # Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector", "-fno-stack-protector",
"-std=c11", "-std=c11",
@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"aarch64-pc-windows-msvc", host): elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
host = "aarch64-pc-windows-msvc" host = "aarch64-pc-windows-msvc"
condition = "defined(_M_ARM64)" condition = "defined(_M_ARM64)"
args = ["-fms-runtime-lib=dll", "-fplt"] args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerAArch64 optimizer = _optimizers.OptimizerAArch64
target = _COFF64(host, condition, args=args, optimizer=optimizer) target = _COFF64(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"aarch64-.*-linux-gnu", host): elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
host = "aarch64-unknown-linux-gnu" host = "aarch64-unknown-linux-gnu"
condition = "defined(__aarch64__) && defined(__linux__)" condition = "defined(__aarch64__) && defined(__linux__)"
# -mno-outline-atomics: Keep intrinsics from being emitted. # -mno-outline-atomics: Keep intrinsics from being emitted.
args = ["-fpic", "-mno-outline-atomics"] args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
optimizer = _optimizers.OptimizerAArch64 optimizer = _optimizers.OptimizerAArch64
target = _ELF(host, condition, args=args, optimizer=optimizer) target = _ELF(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"i686-pc-windows-msvc", host): elif re.fullmatch(r"i686-pc-windows-msvc", host):
@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"x86_64-.*-linux-gnu", host): elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
host = "x86_64-unknown-linux-gnu" host = "x86_64-unknown-linux-gnu"
condition = "defined(__x86_64__) && defined(__linux__)" condition = "defined(__x86_64__) && defined(__linux__)"
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"] args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
optimizer = _optimizers.OptimizerX86 optimizer = _optimizers.OptimizerX86
target = _ELF(host, condition, args=args, optimizer=optimizer) target = _ELF(host, condition, args=args, optimizer=optimizer)
else: else: