gh-150723: Fix perf jitdump files on macOS (#150728)

The perf jitdump format defines the thread id field of the JR_CODE_LOAD
record as a 32-bit value, but on macOS it was declared as a uint64_t
(since pthread_threadid_np() returns a uint64_t). Those extra 8 bytes
plus alignment padding shifted every following field, so parsers reading
the file by the spec misread code_size as the code address and failed to
resolve any Python frames.

Declare thread_id as uint32_t on all platforms and truncate the macOS
thread id when writing the record. The value is only informational.
Symbols are resolved by address, and not thread ids so truncation is
safe here.

* Use mach_absolute_time for macOS jitdump timestamps

On macOS the jitdump file is consumed by profilers such as samply, which
timestamp their samples using mach_absolute_time(). The jitdump events were
stamped with clock_gettime(CLOCK_MONOTONIC), a different clock domain that
keeps advancing while the system is asleep, so the JIT code mappings could be
off by days relative to the samples and no Python frame would resolve. Stamp
jitdump events with mach_absolute_time() on macOS so they share the sampler's
clock domain. Linux continues to use CLOCK_MONOTONIC to stay aligned with perf.

Exercise the -Xperf_jit (jitdump) backend through samply and assert that
Python frames resolve, exercising the binary jitdump path end to end.
Skipped when samply is not installed.
This commit is contained in:
Nazım Can Altınova 2026-06-03 02:15:34 +02:00 committed by GitHub
parent 29805f00a1
commit 494f2e3c92
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 56 additions and 5 deletions

View file

@ -82,6 +82,9 @@
#if defined(__linux__)
# include <sys/syscall.h> // System call interface
#endif
#if defined(__APPLE__)
# include <mach/mach_time.h> // mach_absolute_time, mach_timebase_info
#endif
// =============================================================================
// CONSTANTS AND CONFIGURATION
@ -217,11 +220,7 @@ struct BaseEvent {
typedef struct {
struct BaseEvent base; // Common event header
uint32_t process_id; // Process ID where code was generated
#if defined(__APPLE__)
uint64_t thread_id; // Thread ID where code was generated
#else
uint32_t thread_id; // Thread ID where code was generated
#endif
uint64_t vma; // Virtual memory address where code is loaded
uint64_t code_address; // Address of the actual machine code
uint64_t code_size; // Size of the machine code in bytes
@ -295,7 +294,9 @@ static PerfMapJitState perf_jit_map_state;
// =============================================================================
/* Time conversion constant */
#if !defined(__APPLE__)
static const intptr_t nanoseconds_per_second = 1000000000;
#endif
/*
* Get current monotonic time in nanoseconds
@ -307,6 +308,18 @@ static const intptr_t nanoseconds_per_second = 1000000000;
* Returns: Current monotonic time in nanoseconds since an arbitrary epoch
*/
static int64_t get_current_monotonic_ticks(void) {
#if defined(__APPLE__)
// On macOS the jitdump file is consumed by profilers (such as samply) that
// timestamp their samples using mach_absolute_time(). The jitdump event
// timestamps must use the same clock domain, otherwise the JIT code
// mappings cannot be lined up with the samples.
static mach_timebase_info_data_t timebase = {0, 0};
if (timebase.denom == 0) {
(void)mach_timebase_info(&timebase);
}
uint64_t ticks = mach_absolute_time();
return (int64_t)(ticks * timebase.numer / timebase.denom);
#else
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
Py_UNREACHABLE(); // Should never fail on supported systems
@ -318,6 +331,7 @@ static int64_t get_current_monotonic_ticks(void) {
result *= nanoseconds_per_second;
result += ts.tv_nsec;
return result;
#endif
}
/*
@ -652,7 +666,12 @@ static void perf_map_jit_write_entry_with_name(
ev.base.time_stamp = get_current_monotonic_ticks();
ev.process_id = getpid();
#if defined(__APPLE__)
pthread_threadid_np(NULL, &ev.thread_id);
// The jitdump format defines the thread id field as a 32-bit value, but
// pthread_threadid_np() returns a 64-bit id. Truncate it to 32 bits to
// keep the record layout identical to other platforms.
uint64_t thread_id = 0;
pthread_threadid_np(NULL, &thread_id);
ev.thread_id = (uint32_t)thread_id;
#else
ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call
#endif