gh-138497: Support LLVM_VERSION configuration via env (#138498)

Co-authored-by: Savannah Ostrowski <savannah@python.org>
This commit is contained in:
danigm 2025-10-07 16:54:31 +02:00 committed by GitHub
parent 7094f09f54
commit 96c59a6e42
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 56 additions and 25 deletions

View file

@ -0,0 +1,4 @@
The LLVM version used by the JIT at build time can now be modified using
the ``LLVM_VERSION`` environment variable. Use this at your own risk, as
there is only one officially supported LLVM version. For more information,
please check ``Tools/jit/README.md``.

View file

@ -9,7 +9,7 @@ ## Installing LLVM
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
LLVM version 19 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
It's easy to install all of the required tools: It's easy to install all of the required tools:

View file

@ -10,8 +10,8 @@
import _targets import _targets
_LLVM_VERSION = 19
_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\S*\s+") _LLVM_VERSION = "19"
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0" _EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
_P = typing.ParamSpec("_P") _P = typing.ParamSpec("_P")
@ -56,53 +56,66 @@ async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str
@_async_cache @_async_cache
async def _check_tool_version(name: str, *, echo: bool = False) -> bool: async def _check_tool_version(
name: str, llvm_version: str, *, echo: bool = False
) -> bool:
output = await _run(name, ["--version"], echo=echo) output = await _run(name, ["--version"], echo=echo)
return bool(output and _LLVM_VERSION_PATTERN.search(output)) _llvm_version_pattern = re.compile(rf"version\s+{llvm_version}\.\d+\.\d+\S*\s+")
return bool(output and _llvm_version_pattern.search(output))
@_async_cache @_async_cache
async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None: async def _get_brew_llvm_prefix(llvm_version: str, *, echo: bool = False) -> str | None:
output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo) output = await _run("brew", ["--prefix", f"llvm@{llvm_version}"], echo=echo)
return output and output.removesuffix("\n") return output and output.removesuffix("\n")
@_async_cache @_async_cache
async def _find_tool(tool: str, *, echo: bool = False) -> str | None: async def _find_tool(tool: str, llvm_version: str, *, echo: bool = False) -> str | None:
# Unversioned executables: # Unversioned executables:
path = tool path = tool
if await _check_tool_version(path, echo=echo): if await _check_tool_version(path, llvm_version, echo=echo):
return path return path
# Versioned executables: # Versioned executables:
path = f"{tool}-{_LLVM_VERSION}" path = f"{tool}-{llvm_version}"
if await _check_tool_version(path, echo=echo): if await _check_tool_version(path, llvm_version, echo=echo):
return path return path
# PCbuild externals: # PCbuild externals:
externals = os.environ.get("EXTERNALS_DIR", _targets.EXTERNALS) externals = os.environ.get("EXTERNALS_DIR", _targets.EXTERNALS)
path = os.path.join(externals, _EXTERNALS_LLVM_TAG, "bin", tool) path = os.path.join(externals, _EXTERNALS_LLVM_TAG, "bin", tool)
if await _check_tool_version(path, echo=echo): if await _check_tool_version(path, llvm_version, echo=echo):
return path return path
# Homebrew-installed executables: # Homebrew-installed executables:
prefix = await _get_brew_llvm_prefix(echo=echo) prefix = await _get_brew_llvm_prefix(llvm_version, echo=echo)
if prefix is not None: if prefix is not None:
path = os.path.join(prefix, "bin", tool) path = os.path.join(prefix, "bin", tool)
if await _check_tool_version(path, echo=echo): if await _check_tool_version(path, llvm_version, echo=echo):
return path return path
# Nothing found: # Nothing found:
return None return None
async def maybe_run( async def maybe_run(
tool: str, args: typing.Iterable[str], echo: bool = False tool: str,
args: typing.Iterable[str],
echo: bool = False,
llvm_version: str = _LLVM_VERSION,
) -> str | None: ) -> str | None:
"""Run an LLVM tool if it can be found. Otherwise, return None.""" """Run an LLVM tool if it can be found. Otherwise, return None."""
path = await _find_tool(tool, echo=echo)
path = await _find_tool(tool, llvm_version, echo=echo)
return path and await _run(path, args, echo=echo) return path and await _run(path, args, echo=echo)
async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str: async def run(
tool: str,
args: typing.Iterable[str],
echo: bool = False,
llvm_version: str = _LLVM_VERSION,
) -> str:
"""Run an LLVM tool if it can be found. Otherwise, raise RuntimeError.""" """Run an LLVM tool if it can be found. Otherwise, raise RuntimeError."""
output = await maybe_run(tool, args, echo=echo)
output = await maybe_run(tool, args, echo=echo, llvm_version=llvm_version)
if output is None: if output is None:
raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!") raise RuntimeError(f"Can't find {tool}-{llvm_version}!")
return output return output

View file

@ -50,6 +50,7 @@ class _Target(typing.Generic[_S, _R]):
debug: bool = False debug: bool = False
verbose: bool = False verbose: bool = False
cflags: str = "" cflags: str = ""
llvm_version: str = _llvm._LLVM_VERSION
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve() pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve()
@ -81,7 +82,9 @@ def _compute_digest(self) -> str:
async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
group = _stencils.StencilGroup() group = _stencils.StencilGroup()
args = ["--disassemble", "--reloc", f"{path}"] args = ["--disassemble", "--reloc", f"{path}"]
output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose) output = await _llvm.maybe_run(
"llvm-objdump", args, echo=self.verbose, llvm_version=self.llvm_version
)
if output is not None: if output is not None:
# Make sure that full paths don't leak out (for reproducibility): # Make sure that full paths don't leak out (for reproducibility):
long, short = str(path), str(path.name) long, short = str(path), str(path.name)
@ -99,7 +102,9 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
"--sections", "--sections",
f"{path}", f"{path}",
] ]
output = await _llvm.run("llvm-readobj", args, echo=self.verbose) output = await _llvm.run(
"llvm-readobj", args, echo=self.verbose, llvm_version=self.llvm_version
)
# --elf-output-style=JSON is only *slightly* broken on Mach-O... # --elf-output-style=JSON is only *slightly* broken on Mach-O...
output = output.replace("PrivateExtern\n", "\n") output = output.replace("PrivateExtern\n", "\n")
output = output.replace("Extern\n", "\n") output = output.replace("Extern\n", "\n")
@ -175,12 +180,16 @@ async def _compile(
# Allow user-provided CFLAGS to override any defaults # Allow user-provided CFLAGS to override any defaults
*shlex.split(self.cflags), *shlex.split(self.cflags),
] ]
await _llvm.run("clang", args_s, echo=self.verbose) await _llvm.run(
"clang", args_s, echo=self.verbose, llvm_version=self.llvm_version
)
self.optimizer( self.optimizer(
s, label_prefix=self.label_prefix, symbol_prefix=self.symbol_prefix s, label_prefix=self.label_prefix, symbol_prefix=self.symbol_prefix
).run() ).run()
args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"] args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
await _llvm.run("clang", args_o, echo=self.verbose) await _llvm.run(
"clang", args_o, echo=self.verbose, llvm_version=self.llvm_version
)
return await self._parse(o) return await self._parse(o)
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
@ -224,6 +233,8 @@ def build(
if not self.stable: if not self.stable:
warning = f"JIT support for {self.triple} is still experimental!" warning = f"JIT support for {self.triple} is still experimental!"
request = "Please report any issues you encounter.".center(len(warning)) request = "Please report any issues you encounter.".center(len(warning))
if self.llvm_version != _llvm._LLVM_VERSION:
request = f"Warning! Building with an LLVM version other than {_llvm._LLVM_VERSION} is not supported."
outline = "=" * len(warning) outline = "=" * len(warning)
print("\n".join(["", outline, warning, request, outline, ""])) print("\n".join(["", outline, warning, request, outline, ""]))
digest = f"// {self._compute_digest()}\n" digest = f"// {self._compute_digest()}\n"

View file

@ -42,6 +42,7 @@
parser.add_argument( parser.add_argument(
"--cflags", help="additional flags to pass to the compiler", default="" "--cflags", help="additional flags to pass to the compiler", default=""
) )
parser.add_argument("--llvm-version", help="LLVM version to use")
args = parser.parse_args() args = parser.parse_args()
for target in args.target: for target in args.target:
target.debug = args.debug target.debug = args.debug
@ -49,6 +50,8 @@
target.verbose = args.verbose target.verbose = args.verbose
target.cflags = args.cflags target.cflags = args.cflags
target.pyconfig_dir = args.pyconfig_dir target.pyconfig_dir = args.pyconfig_dir
if args.llvm_version:
target.llvm_version = args.llvm_version
target.build( target.build(
comment=comment, comment=comment,
force=args.force, force=args.force,

2
configure generated vendored
View file

@ -10875,7 +10875,7 @@ then :
else case e in #( else case e in #(
e) as_fn_append CFLAGS_NODIST " $jit_flags" e) as_fn_append CFLAGS_NODIST " $jit_flags"
REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\"" REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\" --llvm-version=\"$LLVM_VERSION\""
if test "x$Py_DEBUG" = xtrue if test "x$Py_DEBUG" = xtrue
then : then :
as_fn_append REGEN_JIT_COMMAND " --debug" as_fn_append REGEN_JIT_COMMAND " --debug"

View file

@ -2786,7 +2786,7 @@ AS_VAR_IF([jit_flags],
[], [],
[AS_VAR_APPEND([CFLAGS_NODIST], [" $jit_flags"]) [AS_VAR_APPEND([CFLAGS_NODIST], [" $jit_flags"])
AS_VAR_SET([REGEN_JIT_COMMAND], AS_VAR_SET([REGEN_JIT_COMMAND],
["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\""]) ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py ${ARCH_TRIPLES:-$host} --output-dir . --pyconfig-dir . --cflags=\"$CFLAGS_JIT\" --llvm-version=\"$LLVM_VERSION\""])
AS_VAR_IF([Py_DEBUG], AS_VAR_IF([Py_DEBUG],
[true], [true],
[AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])], [AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])],