mirror of
https://github.com/python/cpython.git
synced 2026-06-05 01:10:53 +00:00
gh-150723: Fix perf jitdump files on macOS (#150728)
The perf jitdump format defines the thread id field of the JR_CODE_LOAD record as a 32-bit value, but on macOS it was declared as a uint64_t (since pthread_threadid_np() returns a uint64_t). Those extra 8 bytes plus alignment padding shifted every following field, so parsers reading the file by the spec misread code_size as the code address and failed to resolve any Python frames. Declare thread_id as uint32_t on all platforms and truncate the macOS thread id when writing the record. The value is only informational. Symbols are resolved by address, and not thread ids so truncation is safe here. * Use mach_absolute_time for macOS jitdump timestamps On macOS the jitdump file is consumed by profilers such as samply, which timestamp their samples using mach_absolute_time(). The jitdump events were stamped with clock_gettime(CLOCK_MONOTONIC), a different clock domain that keeps advancing while the system is asleep, so the JIT code mappings could be off by days relative to the samples and no Python frame would resolve. Stamp jitdump events with mach_absolute_time() on macOS so they share the sampler's clock domain. Linux continues to use CLOCK_MONOTONIC to stay aligned with perf. Exercise the -Xperf_jit (jitdump) backend through samply and assert that Python frames resolve, exercising the binary jitdump path end to end. Skipped when samply is not installed.
This commit is contained in:
parent
29805f00a1
commit
494f2e3c92
4 changed files with 56 additions and 5 deletions
|
|
@ -82,6 +82,9 @@
|
|||
#if defined(__linux__)
|
||||
# include <sys/syscall.h> // System call interface
|
||||
#endif
|
||||
#if defined(__APPLE__)
|
||||
# include <mach/mach_time.h> // mach_absolute_time, mach_timebase_info
|
||||
#endif
|
||||
|
||||
// =============================================================================
|
||||
// CONSTANTS AND CONFIGURATION
|
||||
|
|
@ -217,11 +220,7 @@ struct BaseEvent {
|
|||
typedef struct {
|
||||
struct BaseEvent base; // Common event header
|
||||
uint32_t process_id; // Process ID where code was generated
|
||||
#if defined(__APPLE__)
|
||||
uint64_t thread_id; // Thread ID where code was generated
|
||||
#else
|
||||
uint32_t thread_id; // Thread ID where code was generated
|
||||
#endif
|
||||
uint64_t vma; // Virtual memory address where code is loaded
|
||||
uint64_t code_address; // Address of the actual machine code
|
||||
uint64_t code_size; // Size of the machine code in bytes
|
||||
|
|
@ -295,7 +294,9 @@ static PerfMapJitState perf_jit_map_state;
|
|||
// =============================================================================
|
||||
|
||||
/* Time conversion constant */
|
||||
#if !defined(__APPLE__)
|
||||
static const intptr_t nanoseconds_per_second = 1000000000;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Get current monotonic time in nanoseconds
|
||||
|
|
@ -307,6 +308,18 @@ static const intptr_t nanoseconds_per_second = 1000000000;
|
|||
* Returns: Current monotonic time in nanoseconds since an arbitrary epoch
|
||||
*/
|
||||
static int64_t get_current_monotonic_ticks(void) {
|
||||
#if defined(__APPLE__)
|
||||
// On macOS the jitdump file is consumed by profilers (such as samply) that
|
||||
// timestamp their samples using mach_absolute_time(). The jitdump event
|
||||
// timestamps must use the same clock domain, otherwise the JIT code
|
||||
// mappings cannot be lined up with the samples.
|
||||
static mach_timebase_info_data_t timebase = {0, 0};
|
||||
if (timebase.denom == 0) {
|
||||
(void)mach_timebase_info(&timebase);
|
||||
}
|
||||
uint64_t ticks = mach_absolute_time();
|
||||
return (int64_t)(ticks * timebase.numer / timebase.denom);
|
||||
#else
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
|
||||
Py_UNREACHABLE(); // Should never fail on supported systems
|
||||
|
|
@ -318,6 +331,7 @@ static int64_t get_current_monotonic_ticks(void) {
|
|||
result *= nanoseconds_per_second;
|
||||
result += ts.tv_nsec;
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -652,7 +666,12 @@ static void perf_map_jit_write_entry_with_name(
|
|||
ev.base.time_stamp = get_current_monotonic_ticks();
|
||||
ev.process_id = getpid();
|
||||
#if defined(__APPLE__)
|
||||
pthread_threadid_np(NULL, &ev.thread_id);
|
||||
// The jitdump format defines the thread id field as a 32-bit value, but
|
||||
// pthread_threadid_np() returns a 64-bit id. Truncate it to 32 bits to
|
||||
// keep the record layout identical to other platforms.
|
||||
uint64_t thread_id = 0;
|
||||
pthread_threadid_np(NULL, &thread_id);
|
||||
ev.thread_id = (uint32_t)thread_id;
|
||||
#else
|
||||
ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue