GH-136895: Update JIT builds to use LLVM 20 (#140329)

Co-authored-by: Emma Harper Smith <emma@emmatyping.dev>
This commit is contained in:
Savannah Ostrowski 2025-11-03 10:01:44 -08:00 committed by GitHub
parent b373d3494c
commit 4e2ff4ac4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 151 additions and 50 deletions

View file

@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
}
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
#include "jit_stencils.h"
#if defined(__aarch64__) || defined(_M_ARM64)
#define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8
#elif defined(__x86_64__) && defined(__APPLE__)
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
// range.
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
#define DATA_ALIGN 8
#else
#define TRAMPOLINE_SIZE 0
#define DATA_ALIGN 1
#endif
// Get the trampoline memory location for a given symbol ordinal.
static unsigned char *
get_trampoline_slot(int ordinal, jit_state *state)
{
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);
// Count the number of set bits in the trampoline mask lower than ordinal
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
return trampoline;
}
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
// in the jit_stencils.h in the symbols_map.
void
@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
return;
}
// Masking is done modulo 32 as the mask is stored as an array of uint32_t
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);
// Count the number of set bits in the trampoline mask lower than ordinal,
// this gives the index into the array of trampolines.
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
// Out of range - need a trampoline
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
/* Generate the trampoline
@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
patch_aarch64_26r(location, (uintptr_t)p);
}
// Generate and patch x86_64 trampolines.
void
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
{
uint64_t value = (uintptr_t)symbols_map[ordinal];
int64_t range = (int64_t)value - 4 - (int64_t)location;
// If we are in range of 32 signed bits, we can patch directly
if (range >= -(1LL << 31) && range < (1LL << 31)) {
patch_32r(location, value - 4);
return;
}
// Out of range - need a trampoline
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
/* Generate the trampoline (14 bytes, padded to 16):
0: ff 25 00 00 00 00 jmp *(%rip)
6: XX XX XX XX XX XX XX XX (64-bit target address)
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
*/
trampoline[0] = 0xFF;
trampoline[1] = 0x25;
memset(trampoline + 2, 0, 4);
memcpy(trampoline + 6, &value, 8);
// Patch the call site to call the trampoline instead
patch_32r(location, (uintptr_t)trampoline - 4);
}
static void
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
{