gh-136459: Add perf trampoline support for macOS (#136461)

This commit is contained in:
Nazım Can Altınova 2025-07-22 17:47:24 +02:00 committed by GitHub
parent b6d3242244
commit a667800558
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 351 additions and 27 deletions

View file

@ -66,7 +66,9 @@
#ifdef PY_HAVE_PERF_TRAMPOLINE
/* Standard library includes for perf jitdump implementation */
#include <elf.h> // ELF architecture constants
#if defined(__linux__)
# include <elf.h> // ELF architecture constants
#endif
#include <fcntl.h> // File control operations
#include <stdio.h> // Standard I/O operations
#include <stdlib.h> // Standard library functions
@ -74,7 +76,9 @@
#include <sys/types.h> // System data types
#include <unistd.h> // System calls (sysconf, getpid)
#include <sys/time.h> // Time functions (gettimeofday)
#include <sys/syscall.h> // System call interface
#if defined(__linux__)
# include <sys/syscall.h> // System call interface
#endif
// =============================================================================
// CONSTANTS AND CONFIGURATION
@ -101,6 +105,22 @@
* based on the actual unwind information requirements.
*/
/* These constants are defined inside <elf.h>, which we can't use outside of linux. */
#if !defined(__linux__)
# if defined(__i386__) || defined(_M_IX86)
# define EM_386 3
# elif defined(__arm__) || defined(_M_ARM)
# define EM_ARM 40
# elif defined(__x86_64__) || defined(_M_X64)
# define EM_X86_64 62
# elif defined(__aarch64__)
# define EM_AARCH64 183
# elif defined(__riscv)
# define EM_RISCV 243
# endif
#endif
/* Convenient access to the global trampoline API state */
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
@ -194,7 +214,7 @@ struct BaseEvent {
typedef struct {
struct BaseEvent base; // Common event header
uint32_t process_id; // Process ID where code was generated
uint32_t thread_id; // Thread ID where code was generated
uint64_t thread_id; // Thread ID where code was generated
uint64_t vma; // Virtual memory address where code is loaded
uint64_t code_address; // Address of the actual machine code
uint64_t code_size; // Size of the machine code in bytes
@ -1035,6 +1055,10 @@ static void* perf_map_jit_init(void) {
return NULL; // Failed to get page size
}
#if defined(__APPLE__)
// On macOS, samply uses a preload to find jitdumps and this mmap can be slow.
perf_jit_map_state.mapped_buffer = NULL;
#else
/*
* Map the first page of the jitdump file
*
@ -1057,6 +1081,7 @@ static void* perf_map_jit_init(void) {
close(fd);
return NULL; // Memory mapping failed
}
#endif
perf_jit_map_state.mapped_size = page_size;
@ -1263,7 +1288,11 @@ static void perf_map_jit_write_entry(void *state, const void *code_addr,
ev.base.size = sizeof(ev) + (name_length+1) + size;
ev.base.time_stamp = get_current_monotonic_ticks();
ev.process_id = getpid();
#if defined(__APPLE__)
pthread_threadid_np(NULL, &ev.thread_id);
#else
ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call
#endif
ev.vma = base; // Virtual memory address
ev.code_address = base; // Same as VMA for our use case
ev.code_size = size;