GH-126910: Build/link the JIT shim in the Python interpreter (#148872)

This commit is contained in:
Diego Russo 2026-04-23 12:23:18 +01:00 committed by GitHub
parent 29917d51ab
commit 9633c5239d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 202 additions and 209 deletions

View file

@ -121,18 +121,11 @@ _PyEval_EvalFrame(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwfl
}
#ifdef _Py_TIER2
#ifdef _Py_JIT
_Py_CODEUNIT *_Py_LazyJitShim(
struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
_PyStackRef *stack_pointer, PyThreadState *tstate
);
#else
_Py_CODEUNIT *_PyTier2Interpreter(
struct _PyExecutorObject *current_executor, _PyInterpreterFrame *frame,
_PyStackRef *stack_pointer, PyThreadState *tstate
);
#endif
#endif
extern _PyJitEntryFuncPtr _Py_jit_entry;

View file

@ -23,9 +23,13 @@ typedef _Py_CODEUNIT *(*jit_func)(
_PyStackRef _tos_cache0, _PyStackRef _tos_cache1, _PyStackRef _tos_cache2
);
_Py_CODEUNIT *_PyJIT(
_PyExecutorObject *executor, _PyInterpreterFrame *frame,
_PyStackRef *stack_pointer, PyThreadState *tstate
);
int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length);
void _PyJIT_Free(_PyExecutorObject *executor);
void _PyJIT_Fini(void);
PyAPI_FUNC(int) _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr);
#endif // _Py_JIT

View file

@ -290,6 +290,7 @@ LDLIBRARYDIR= @LDLIBRARYDIR@
INSTSONAME= @INSTSONAME@
LIBRARY_DEPS= @LIBRARY_DEPS@
LINK_PYTHON_DEPS=@LINK_PYTHON_DEPS@
JIT_OBJS= @JIT_SHIM_O@
PY_ENABLE_SHARED= @PY_ENABLE_SHARED@
STATIC_LIBPYTHON= @STATIC_LIBPYTHON@
@ -469,6 +470,7 @@ PYTHON_OBJS= \
Python/instruction_sequence.o \
Python/intrinsics.o \
Python/jit.o \
$(JIT_OBJS) \
Python/legacy_tracing.o \
Python/lock.o \
Python/marshal.o \
@ -3204,21 +3206,37 @@ Python/emscripten_trampoline_inner.wasm: $(srcdir)/Python/emscripten_trampoline_
Python/emscripten_trampoline_wasm.c: Python/emscripten_trampoline_inner.wasm
$(PYTHON_FOR_REGEN) $(srcdir)/Platforms/emscripten/prepare_external_wasm.py $< $@ getWasmTrampolineModule
JIT_SHIM_BUILD_OBJS= @JIT_SHIM_BUILD_O@
JIT_BUILD_TARGETS= jit_stencils.h @JIT_STENCILS_H@ $(JIT_SHIM_BUILD_OBJS)
JIT_TARGETS= $(JIT_BUILD_TARGETS) $(filter-out $(JIT_SHIM_BUILD_OBJS),$(JIT_OBJS))
JIT_GENERATED_STAMP= .jit-stamp
JIT_DEPS = \
$(srcdir)/Tools/jit/*.c \
$(srcdir)/Tools/jit/*.h \
$(srcdir)/Tools/jit/*.py \
$(srcdir)/Python/executor_cases.c.h \
pyconfig.h
jit_stencils.h @JIT_STENCILS_H@: $(JIT_DEPS)
$(JIT_GENERATED_STAMP): $(JIT_DEPS)
@REGEN_JIT_COMMAND@
@touch $@
$(JIT_BUILD_TARGETS): $(JIT_GENERATED_STAMP)
@if test ! -f "$@"; then \
rm -f $(JIT_GENERATED_STAMP); \
$(MAKE) $(JIT_GENERATED_STAMP); \
test -f "$@"; \
fi
jit_shim-universal2-apple-darwin.o: jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o
lipo -create -output $@ jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o
Python/jit.o: $(srcdir)/Python/jit.c @JIT_STENCILS_H@
$(CC) -c $(PY_CORE_CFLAGS) -o $@ $<
.PHONY: regen-jit
regen-jit:
@REGEN_JIT_COMMAND@
regen-jit: $(JIT_TARGETS)
# Some make's put the object file in the current directory
.c.o:
@ -3342,7 +3360,7 @@ clean-profile: clean-retain-profile clean-bolt
# gh-141808: The JIT stencils are deliberately kept in clean-profile
.PHONY: clean-jit-stencils
clean-jit-stencils:
-rm -f jit_stencils*.h
-rm -f $(JIT_TARGETS) $(JIT_GENERATED_STAMP) jit_stencils*.h jit_shim*.o
.PHONY: clean
clean: clean-profile clean-jit-stencils

View file

@ -12,8 +12,9 @@
<IntDir>$(IntDir.Replace(`\\`, `\`))</IntDir>
<GeneratedFrozenModulesDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_frozen\</GeneratedFrozenModulesDir>
<GeneratedZlibNgDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)$(ArchName)_$(Configuration)\zlib-ng\</GeneratedZlibNgDir>
<GeneratedJitStencilsDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration)</GeneratedJitStencilsDir>
<GeneratedJitStencilsDir Condition="$(Configuration) == 'PGUpdate'">$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_PGInstrument</GeneratedJitStencilsDir>
<GeneratedJitStencilsDir>$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_$(Configuration)\</GeneratedJitStencilsDir>
<GeneratedJitStencilsDir Condition="$(Configuration) == 'PGUpdate'">$(Py_IntDir)\$(MajorVersionNumber)$(MinorVersionNumber)_PGInstrument\</GeneratedJitStencilsDir>
<GeneratedJitStencilsDir>$(GeneratedJitStencilsDir.Replace(`\\`, `\`))</GeneratedJitStencilsDir>
<TargetName Condition="'$(TargetName)' == ''">$(ProjectName)</TargetName>
<TargetName>$(TargetName)$(PyDebugExt)</TargetName>
<GenerateManifest>false</GenerateManifest>

View file

@ -115,6 +115,9 @@
<Link>
<AdditionalDependencies>version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="$(IncludeExternals)">zlib-ng$(PyDebugExt).lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="'$(UseJIT)' == 'true' and $(Platform) == 'ARM64'">$(GeneratedJitStencilsDir)jit_shim-aarch64-pc-windows-msvc.o;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="'$(UseJIT)' == 'true' and $(Platform) == 'Win32'">$(GeneratedJitStencilsDir)jit_shim-i686-pc-windows-msvc.o;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="'$(UseJIT)' == 'true' and $(Platform) == 'x64'">$(GeneratedJitStencilsDir)jit_shim-x86_64-pc-windows-msvc.o;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>

View file

@ -35,6 +35,9 @@
<_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-aarch64-pc-windows-msvc.h" Condition="$(Platform) == 'ARM64'"/>
<_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-i686-pc-windows-msvc.h" Condition="$(Platform) == 'Win32'"/>
<_JITOutputs Include="$(GeneratedJitStencilsDir)jit_stencils-x86_64-pc-windows-msvc.h" Condition="$(Platform) == 'x64'"/>
<_JITOutputs Include="$(GeneratedJitStencilsDir)jit_shim-aarch64-pc-windows-msvc.o" Condition="$(Platform) == 'ARM64'"/>
<_JITOutputs Include="$(GeneratedJitStencilsDir)jit_shim-i686-pc-windows-msvc.o" Condition="$(Platform) == 'Win32'"/>
<_JITOutputs Include="$(GeneratedJitStencilsDir)jit_shim-x86_64-pc-windows-msvc.o" Condition="$(Platform) == 'x64'"/>
<_CasesSources Include="$(PySourcePath)Python\bytecodes.c;$(PySourcePath)Python\optimizer_bytecodes.c;"/>
<_CasesOutputs Include="$(PySourcePath)Python\generated_cases.c.h;$(PySourcePath)Include\opcode_ids.h;$(PySourcePath)Include\internal\pycore_uop_ids.h;$(PySourcePath)Python\opcode_targets.h;$(PySourcePath)Include\internal\pycore_opcode_metadata.h;$(PySourcePath)Include\internal\pycore_uop_metadata.h;$(PySourcePath)Python\optimizer_cases.c.h;$(PySourcePath)Lib\_opcode_metadata.py"/>
<_SbomSources Include="$(PySourcePath)PCbuild\get_externals.bat" />
@ -129,7 +132,7 @@
<JITArgs Condition="$(Platform) == 'x64'">x86_64-pc-windows-msvc</JITArgs>
<JITArgs Condition="$(Configuration) == 'Debug'">$(JITArgs) --debug</JITArgs>
</PropertyGroup>
<Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs) --output-dir "$(GeneratedJitStencilsDir)" --pyconfig-dir "$(PySourcePath)PC" --llvm-version="$(LLVM_VERSION)" --llvm-tools-install-dir="$(LLVM_TOOLS_INSTALL_DIR)"'/>
<Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\jit\build.py" $(JITArgs) --output-dir "$(GeneratedJitStencilsDir.TrimEnd(`\`))" --pyconfig-dir "$(PySourcePath)PC" --llvm-version="$(LLVM_VERSION)" --llvm-tools-install-dir="$(LLVM_TOOLS_INSTALL_DIR)"'/>
</Target>
<Target Name="_CleanJIT" AfterTargets="Clean">
<Delete Files="@(_JITOutputs)"/>

View file

@ -1305,7 +1305,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int
}
#ifdef _Py_TIER2
#ifdef _Py_JIT
_PyJitEntryFuncPtr _Py_jit_entry = _Py_LazyJitShim;
_PyJitEntryFuncPtr _Py_jit_entry = _PyJIT;
#else
_PyJitEntryFuncPtr _Py_jit_entry = _PyTier2Interpreter;
#endif

View file

@ -60,8 +60,6 @@ jit_error(const char *message)
PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint);
}
static size_t _Py_jit_shim_size = 0;
static int
address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t addr)
{
@ -104,13 +102,6 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr)
if (interp == NULL) {
return 0;
}
if (_Py_jit_entry != _Py_LazyJitShim && _Py_jit_shim_size != 0) {
uintptr_t start = (uintptr_t)_Py_jit_entry;
uintptr_t end = start + _Py_jit_shim_size;
if (addr >= start && addr < end) {
return 1;
}
}
if (address_in_executor_array(interp->executor_ptrs, interp->executor_count, addr)) {
return 1;
}
@ -727,75 +718,6 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz
return 0;
}
/* One-off compilation of the jit entry shim
* We compile this once only as it effectively a normal
* function, but we need to use the JIT because it needs
* to understand the jit-specific calling convention.
* Don't forget to call _PyJIT_Fini later!
*/
static _PyJitEntryFuncPtr
compile_shim(void)
{
_PyExecutorObject dummy;
const StencilGroup *group;
size_t code_size = 0;
size_t data_size = 0;
jit_state state = {0};
group = &shim;
code_size += group->code_size;
data_size += group->data_size;
combine_symbol_mask(group->trampoline_mask, state.trampolines.mask);
combine_symbol_mask(group->got_mask, state.got_symbols.mask);
// Round up to the nearest page:
size_t page_size = get_page_size();
assert((page_size & (page_size - 1)) == 0);
size_t code_padding = DATA_ALIGN - ((code_size + state.trampolines.size) & (DATA_ALIGN - 1));
size_t padding = page_size - ((code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size) & (page_size - 1));
size_t total_size = code_size + state.trampolines.size + code_padding + data_size + state.got_symbols.size + padding;
unsigned char *memory = jit_alloc(total_size);
if (memory == NULL) {
return NULL;
}
unsigned char *code = memory;
state.trampolines.mem = memory + code_size;
unsigned char *data = memory + code_size + state.trampolines.size + code_padding;
state.got_symbols.mem = data + data_size;
// Compile the shim, which handles converting between the native
// calling convention and the calling convention used by jitted code
// (which may be different for efficiency reasons).
group = &shim;
group->emit(code, data, &dummy, NULL, &state);
code += group->code_size;
data += group->data_size;
assert(code == memory + code_size);
assert(data == memory + code_size + state.trampolines.size + code_padding + data_size);
if (mark_executable(memory, total_size)) {
jit_free(memory, total_size);
return NULL;
}
_Py_jit_shim_size = total_size;
return (_PyJitEntryFuncPtr)memory;
}
static PyMutex lazy_jit_mutex = { 0 };
_Py_CODEUNIT *
_Py_LazyJitShim(
_PyExecutorObject *executor, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
) {
PyMutex_Lock(&lazy_jit_mutex);
if (_Py_jit_entry == _Py_LazyJitShim) {
_PyJitEntryFuncPtr shim = compile_shim();
if (shim == NULL) {
PyMutex_Unlock(&lazy_jit_mutex);
Py_FatalError("Cannot allocate core JIT code");
}
_Py_jit_entry = shim;
}
PyMutex_Unlock(&lazy_jit_mutex);
return _Py_jit_entry(executor, frame, stack_pointer, tstate);
}
// Free executor's memory allocated with _PyJIT_Compile
void
_PyJIT_Free(_PyExecutorObject *executor)
@ -812,22 +734,4 @@ _PyJIT_Free(_PyExecutorObject *executor)
}
}
// Free shim memory allocated with compile_shim
void
_PyJIT_Fini(void)
{
PyMutex_Lock(&lazy_jit_mutex);
unsigned char *memory = (unsigned char *)_Py_jit_entry;
size_t size = _Py_jit_shim_size;
if (size) {
_Py_jit_entry = _Py_LazyJitShim;
_Py_jit_shim_size = 0;
if (jit_free(memory, size)) {
PyErr_FormatUnraisable("Exception ignored while "
"freeing JIT entry code");
}
}
PyMutex_Unlock(&lazy_jit_mutex);
}
#endif // _Py_JIT

View file

@ -37,9 +37,6 @@
#include "pycore_uniqueid.h" // _PyObject_FinalizeUniqueIdPool()
#include "pycore_warnings.h" // _PyWarnings_InitState()
#include "pycore_weakref.h" // _PyWeakref_GET_REF()
#ifdef _Py_JIT
#include "pycore_jit.h" // _PyJIT_Fini()
#endif
#if defined(PYMALLOC_USE_HUGEPAGES) && defined(MS_WINDOWS)
#include <Windows.h>
@ -2531,11 +2528,6 @@ _Py_Finalize(_PyRuntimeState *runtime)
finalize_interp_clear(tstate);
#ifdef _Py_JIT
/* Free JIT shim memory */
_PyJIT_Fini();
#endif
#ifdef Py_TRACE_REFS
/* Display addresses (& refcnts) of all objects still alive.
* An address can be used to find the repr of the object, printed

View file

@ -489,11 +489,6 @@ free_interpreter(PyInterpreterState *interp)
static inline int check_interpreter_whence(long);
#endif
extern _Py_CODEUNIT *
_Py_LazyJitShim(
struct _PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
);
/* Get the interpreter state to a minimal consistent state.
Further init happens in pylifecycle.c before it can be used.
All fields not initialized here are expected to be zeroed out,

View file

@ -57,6 +57,12 @@ class _Target(typing.Generic[_S, _R]):
known_symbols: dict[str, int] = dataclasses.field(default_factory=dict)
pyconfig_dir: pathlib.Path = pathlib.Path.cwd().resolve()
def _compile_args(self) -> list[str]:
return list(self.args)
def _shim_compile_args(self) -> list[str]:
return []
def _get_nop(self) -> bytes:
if re.fullmatch(r"aarch64-.*", self.triple):
nop = b"\x1f\x20\x03\xd5"
@ -139,12 +145,8 @@ def _handle_relocation(
) -> _stencils.Hole:
raise NotImplementedError(type(self))
async def _compile(
self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
) -> _stencils.StencilGroup:
s = tempdir / f"{opname}.s"
o = tempdir / f"{opname}.o"
args_s = [
def _base_clang_args(self, opname: str, tempdir: pathlib.Path) -> list[str]:
return [
f"--target={self.triple}",
"-DPy_BUILD_CORE_MODULE",
"-D_DEBUG" if self.debug else "-DNDEBUG",
@ -167,29 +169,38 @@ async def _compile(
# generates better code than -O2 (and -O2 usually generates better
# code than -O3). As a nice benefit, it uses less memory too:
"-Os",
"-S",
# Shorten full absolute file paths in the generated code (like the
# __FILE__ macro and assert failure messages) for reproducibility:
f"-ffile-prefix-map={CPYTHON}=.",
f"-ffile-prefix-map={tempdir}=.",
# This debug info isn't necessary, and bloats out the JIT'ed code.
# We *may* be able to re-enable this, process it, and JIT it for a
# nicer debugging experience... but that needs a lot more research:
"-fno-asynchronous-unwind-tables",
# Don't call built-in functions that we can't find or patch:
"-fno-builtin",
# Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector",
"-std=c11",
]
async def _build_stencil_group(
self, opname: str, c: pathlib.Path, tempdir: pathlib.Path
) -> _stencils.StencilGroup:
s = tempdir / f"{opname}.s"
o = tempdir / f"{opname}.o"
args_s = self._base_clang_args(opname, tempdir)
args_s += [
"-S",
# Stencils do not need unwind info, and the optimizer does not
# preserve .cfi_* directives correctly. On Darwin,
# -fno-asynchronous-unwind-tables alone still leaves synchronous
# unwind directives in the assembly, so disable both forms here.
"-fno-unwind-tables",
"-fno-asynchronous-unwind-tables",
"-o",
f"{s}",
f"{c}",
]
is_shim = opname == "shim"
if self.frame_pointers:
frame_pointer = "all" if is_shim else "reserved"
args_s += ["-Xclang", f"-mframe-pointer={frame_pointer}"]
args_s += self.args
args_s += ["-Xclang", "-mframe-pointer=reserved"]
args_s += self._compile_args()
# Allow user-provided CFLAGS to override any defaults
args_s += shlex.split(self.cflags)
await _llvm.run(
@ -199,14 +210,13 @@ async def _compile(
llvm_version=self.llvm_version,
llvm_tools_install_dir=self.llvm_tools_install_dir,
)
if not is_shim:
self.optimizer(
s,
label_prefix=self.label_prefix,
symbol_prefix=self.symbol_prefix,
re_global=self.re_global,
frame_pointers=self.frame_pointers,
).run()
self.optimizer(
s,
label_prefix=self.label_prefix,
symbol_prefix=self.symbol_prefix,
re_global=self.re_global,
frame_pointers=self.frame_pointers,
).run()
args_o = [f"--target={self.triple}", "-c", "-o", f"{o}", f"{s}"]
await _llvm.run(
"clang",
@ -217,6 +227,30 @@ async def _compile(
)
return await self._parse(o)
async def _build_shim_object(self, output: pathlib.Path) -> None:
with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve()
args_o = self._base_clang_args("shim", work)
args_o += self._shim_compile_args()
args_o += [
"-c",
# The linked shim is a real function in the final binary, so
# keep unwind info for debuggers and stack walkers.
"-fasynchronous-unwind-tables",
]
if self.frame_pointers:
args_o += ["-Xclang", "-mframe-pointer=all"]
args_o += self._compile_args()
args_o += shlex.split(self.cflags)
args_o += ["-o", f"{output}", f"{TOOLS_JIT / 'shim.c'}"]
await _llvm.run(
"clang",
args_o,
echo=self.verbose,
llvm_version=self.llvm_version,
llvm_tools_install_dir=self.llvm_tools_install_dir,
)
async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text()
cases_and_opnames = sorted(
@ -231,8 +265,6 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
with tempfile.TemporaryDirectory() as tempdir:
work = pathlib.Path(tempdir).resolve()
async with asyncio.TaskGroup() as group:
coro = self._compile("shim", TOOLS_JIT / "shim.c", work)
tasks.append(group.create_task(coro, name="shim"))
template = TOOLS_JIT_TEMPLATE_C.read_text()
for case, opname in cases_and_opnames:
# Write out a copy of the template with *only* this case
@ -242,7 +274,7 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
# all of the other cases):
c = work / f"{opname}.c"
c.write_text(template.replace("CASE", case))
coro = self._compile(opname, c, work)
coro = self._build_stencil_group(opname, c, work)
tasks.append(group.create_task(coro, name=opname))
stencil_groups = {task.get_name(): task.result() for task in tasks}
for stencil_group in stencil_groups.values():
@ -256,8 +288,9 @@ def build(
comment: str = "",
force: bool = False,
jit_stencils: pathlib.Path,
jit_shim_object: pathlib.Path,
) -> None:
"""Build jit_stencils.h in the given directory."""
"""Build jit_stencils.h and the shim object in the given directory."""
jit_stencils.parent.mkdir(parents=True, exist_ok=True)
if not self.stable:
warning = f"JIT support for {self.triple} is still experimental!"
@ -271,8 +304,10 @@ def build(
not force
and jit_stencils.exists()
and jit_stencils.read_text().startswith(digest)
and jit_shim_object.exists()
):
return
ASYNCIO_RUNNER.run(self._build_shim_object(jit_shim_object))
stencil_groups = ASYNCIO_RUNNER.run(self._build_stencils())
jit_stencils_new = jit_stencils.parent / "jit_stencils.h.new"
try:
@ -296,6 +331,13 @@ def build(
class _COFF(
_Target[_schema.COFFSection, _schema.COFFRelocation]
): # pylint: disable = too-few-public-methods
def _shim_compile_args(self) -> list[str]:
# The linked shim is part of pythoncore, not a shared extension.
# On Windows, Py_BUILD_CORE_MODULE makes public APIs import from
# pythonXY.lib, which creates a self-dependency when linking
# pythoncore.dll. Build the shim with builtin/core semantics.
return ["-UPy_BUILD_CORE_MODULE", "-DPy_BUILD_CORE_BUILTIN"]
def _handle_section(
self, section: _schema.COFFSection, group: _stencils.StencilGroup
) -> None:
@ -396,6 +438,10 @@ class _COFF64(_COFF):
symbol_prefix = ""
re_global = re.compile(r'\s*\.def\s+(?P<label>[\w."$?@]+);')
def _compile_args(self) -> list[str]:
runtime = "-fms-runtime-lib=dll_dbg" if self.debug else "-fms-runtime-lib=dll"
return [runtime, *self.args]
class _ELF(
_Target[_schema.ELFSection, _schema.ELFRelocation]
@ -607,9 +653,8 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
host = "aarch64-pc-windows-msvc"
condition = "defined(_M_ARM64)"
args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerAArch64
target = _COFF64(host, condition, args=args, optimizer=optimizer)
target = _COFF64(host, condition, optimizer=optimizer)
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
host = "aarch64-unknown-linux-gnu"
condition = "defined(__aarch64__) && defined(__linux__)"
@ -636,9 +681,8 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"x86_64-pc-windows-msvc", host):
host = "x86_64-pc-windows-msvc"
condition = "defined(_M_X64)"
args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerX86
target = _COFF64(host, condition, args=args, optimizer=optimizer)
target = _COFF64(host, condition, optimizer=optimizer)
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
host = "x86_64-unknown-linux-gnu"
condition = "defined(__x86_64__) && defined(__linux__)"

View file

@ -22,12 +22,8 @@ def _dump_footer(
yield " symbol_mask got_mask;"
yield "} StencilGroup;"
yield ""
yield f"static const StencilGroup shim = {groups['shim'].as_c('shim')};"
yield ""
yield "static const StencilGroup stencil_groups[MAX_UOP_REGS_ID + 1] = {"
for opname, group in sorted(groups.items()):
if opname == "shim":
continue
yield f" [{opname}] = {group.as_c(opname)},"
yield "};"
yield ""

View file

@ -61,6 +61,7 @@
comment=comment,
force=args.force,
jit_stencils=args.output_dir / f"jit_stencils-{target.triple}.h",
jit_shim_object=args.output_dir / f"jit_shim-{target.triple}.o",
)
jit_stencils_h = args.output_dir / "jit_stencils.h"
lines = [f"// {comment}\n"]

View file

@ -7,7 +7,7 @@
#include "jit.h"
_Py_CODEUNIT *
_JIT_ENTRY(
_PyJIT(
_PyExecutorObject *exec, _PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate
) {
// Note that this is *not* a tail call

68
configure generated vendored
View file

@ -644,6 +644,8 @@ ac_includes_default="\
ac_header_c_list=
ac_subst_vars='LTLIBOBJS
MODULE_BLOCK
JIT_SHIM_BUILD_O
JIT_SHIM_O
JIT_STENCILS_H
MODULE_XXLIMITED_35_FALSE
MODULE_XXLIMITED_35_TRUE
@ -34703,38 +34705,56 @@ printf "%s\n" "$py_cv_module_xxlimited_35" >&6; }
# Determine JIT stencils header files based on target platform
JIT_STENCILS_H=""
if test "x$enable_experimental_jit" = xno
JIT_SHIM_O=""
JIT_SHIM_BUILD_O=""
if ${jit_flags:+false} :
then :
else case e in #(
e) case "$host" in
aarch64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
;;
x86_64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
;;
aarch64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
;;
i686-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
;;
x86_64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
;;
aarch64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
;;
x86_64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
;;
esac ;;
e) if test "${enable_universalsdk}" && test "$UNIVERSAL_ARCHS" = "universal2"; then
JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h jit_stencils-x86_64-apple-darwin.h"
JIT_SHIM_O="jit_shim-universal2-apple-darwin.o"
JIT_SHIM_BUILD_O="jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o"
else
case "$host" in
aarch64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
JIT_SHIM_O="jit_shim-aarch64-apple-darwin.o"
;;
x86_64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
JIT_SHIM_O="jit_shim-x86_64-apple-darwin.o"
;;
aarch64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
JIT_SHIM_O="jit_shim-aarch64-pc-windows-msvc.o"
;;
i686-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
JIT_SHIM_O="jit_shim-i686-pc-windows-msvc.o"
;;
x86_64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
JIT_SHIM_O="jit_shim-x86_64-pc-windows-msvc.o"
;;
aarch64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
JIT_SHIM_O="jit_shim-aarch64-unknown-linux-gnu.o"
;;
x86_64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
JIT_SHIM_O="jit_shim-x86_64-unknown-linux-gnu.o"
;;
esac
JIT_SHIM_BUILD_O="$JIT_SHIM_O"
fi ;;
esac
fi
# substitute multiline block, must come after last PY_STDLIB_MOD()

View file

@ -8384,33 +8384,52 @@ PY_STDLIB_MOD([xxlimited_35], [test "$TEST_MODULES" = yes], [test "$ac_cv_func_d
# Determine JIT stencils header files based on target platform
JIT_STENCILS_H=""
AS_VAR_IF([enable_experimental_jit], [no],
JIT_SHIM_O=""
JIT_SHIM_BUILD_O=""
AS_VAR_IF([jit_flags],
[],
[case "$host" in
aarch64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
;;
x86_64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
;;
aarch64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
;;
i686-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
;;
x86_64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
;;
aarch64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
;;
x86_64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
;;
esac])
[],
[if test "${enable_universalsdk}" && test "$UNIVERSAL_ARCHS" = "universal2"; then
JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h jit_stencils-x86_64-apple-darwin.h"
JIT_SHIM_O="jit_shim-universal2-apple-darwin.o"
JIT_SHIM_BUILD_O="jit_shim-aarch64-apple-darwin.o jit_shim-x86_64-apple-darwin.o"
else
case "$host" in
aarch64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-aarch64-apple-darwin.h"
JIT_SHIM_O="jit_shim-aarch64-apple-darwin.o"
;;
x86_64-apple-darwin*)
JIT_STENCILS_H="jit_stencils-x86_64-apple-darwin.h"
JIT_SHIM_O="jit_shim-x86_64-apple-darwin.o"
;;
aarch64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-aarch64-pc-windows-msvc.h"
JIT_SHIM_O="jit_shim-aarch64-pc-windows-msvc.o"
;;
i686-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-i686-pc-windows-msvc.h"
JIT_SHIM_O="jit_shim-i686-pc-windows-msvc.o"
;;
x86_64-pc-windows-msvc)
JIT_STENCILS_H="jit_stencils-x86_64-pc-windows-msvc.h"
JIT_SHIM_O="jit_shim-x86_64-pc-windows-msvc.o"
;;
aarch64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-aarch64-unknown-linux-gnu.h"
JIT_SHIM_O="jit_shim-aarch64-unknown-linux-gnu.o"
;;
x86_64-*-linux-gnu)
JIT_STENCILS_H="jit_stencils-x86_64-unknown-linux-gnu.h"
JIT_SHIM_O="jit_shim-x86_64-unknown-linux-gnu.o"
;;
esac
JIT_SHIM_BUILD_O="$JIT_SHIM_O"
fi])
AC_SUBST([JIT_STENCILS_H])
AC_SUBST([JIT_SHIM_O])
AC_SUBST([JIT_SHIM_BUILD_O])
# substitute multiline block, must come after last PY_STDLIB_MOD()
AC_SUBST([MODULE_BLOCK])