mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
GH-136895: Update JIT builds to use LLVM 20 (#140329)
Co-authored-by: Emma Harper Smith <emma@emmatyping.dev>
This commit is contained in:
parent
b373d3494c
commit
4e2ff4ac4c
9 changed files with 151 additions and 50 deletions
72
Python/jit.c
72
Python/jit.c
|
|
@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
|
|||
}
|
||||
|
||||
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
|
||||
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
|
||||
|
||||
#include "jit_stencils.h"
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define TRAMPOLINE_SIZE 16
|
||||
#define DATA_ALIGN 8
|
||||
#elif defined(__x86_64__) && defined(__APPLE__)
|
||||
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
|
||||
// range.
|
||||
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
|
||||
#define DATA_ALIGN 8
|
||||
#else
|
||||
#define TRAMPOLINE_SIZE 0
|
||||
#define DATA_ALIGN 1
|
||||
#endif
|
||||
|
||||
// Get the trampoline memory location for a given symbol ordinal.
|
||||
static unsigned char *
|
||||
get_trampoline_slot(int ordinal, jit_state *state)
|
||||
{
|
||||
const uint32_t symbol_mask = 1 << (ordinal % 32);
|
||||
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
|
||||
assert(symbol_mask & trampoline_mask);
|
||||
|
||||
// Count the number of set bits in the trampoline mask lower than ordinal
|
||||
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
|
||||
for (int i = 0; i < ordinal / 32; i++) {
|
||||
index += _Py_popcount32(state->trampolines.mask[i]);
|
||||
}
|
||||
|
||||
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
|
||||
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
|
||||
// in the jit_stencils.h in the symbols_map.
|
||||
void
|
||||
|
|
@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
|
|||
return;
|
||||
}
|
||||
|
||||
// Masking is done modulo 32 as the mask is stored as an array of uint32_t
|
||||
const uint32_t symbol_mask = 1 << (ordinal % 32);
|
||||
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
|
||||
assert(symbol_mask & trampoline_mask);
|
||||
|
||||
// Count the number of set bits in the trampoline mask lower than ordinal,
|
||||
// this gives the index into the array of trampolines.
|
||||
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
|
||||
for (int i = 0; i < ordinal / 32; i++) {
|
||||
index += _Py_popcount32(state->trampolines.mask[i]);
|
||||
}
|
||||
|
||||
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
|
||||
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
|
||||
// Out of range - need a trampoline
|
||||
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
|
||||
|
||||
|
||||
/* Generate the trampoline
|
||||
|
|
@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
|
|||
patch_aarch64_26r(location, (uintptr_t)p);
|
||||
}
|
||||
|
||||
// Generate and patch x86_64 trampolines.
|
||||
void
|
||||
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
|
||||
{
|
||||
uint64_t value = (uintptr_t)symbols_map[ordinal];
|
||||
int64_t range = (int64_t)value - 4 - (int64_t)location;
|
||||
|
||||
// If we are in range of 32 signed bits, we can patch directly
|
||||
if (range >= -(1LL << 31) && range < (1LL << 31)) {
|
||||
patch_32r(location, value - 4);
|
||||
return;
|
||||
}
|
||||
|
||||
// Out of range - need a trampoline
|
||||
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
|
||||
|
||||
/* Generate the trampoline (14 bytes, padded to 16):
|
||||
0: ff 25 00 00 00 00 jmp *(%rip)
|
||||
6: XX XX XX XX XX XX XX XX (64-bit target address)
|
||||
|
||||
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
|
||||
*/
|
||||
trampoline[0] = 0xFF;
|
||||
trampoline[1] = 0x25;
|
||||
memset(trampoline + 2, 0, 4);
|
||||
memcpy(trampoline + 6, &value, 8);
|
||||
|
||||
// Patch the call site to call the trampoline instead
|
||||
patch_32r(location, (uintptr_t)trampoline - 4);
|
||||
}
|
||||
|
||||
static void
|
||||
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue