| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Python Perf Trampoline Support - JIT Dump Implementation | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This file implements the perf jitdump API for Python's performance profiling | 
					
						
							|  |  |  |  * integration. It allows perf (Linux performance analysis tool) to understand | 
					
						
							|  |  |  |  * and profile dynamically generated Python bytecode by creating JIT dump files | 
					
						
							|  |  |  |  * that perf can inject into its analysis. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * IMPORTANT: This file exports specific callback functions that are part of | 
					
						
							|  |  |  |  * Python's internal API. Do not modify the function signatures or behavior | 
					
						
							|  |  |  |  * of exported functions without coordinating with the Python core team. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Usually the binary and libraries are mapped in separate region like below: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *   address -> | 
					
						
							|  |  |  |  *    --+---------------------+--//--+---------------------+--
 | 
					
						
							|  |  |  |  *      | .text | .data | ... |      | .text | .data | ... | | 
					
						
							|  |  |  |  *    --+---------------------+--//--+---------------------+--
 | 
					
						
							|  |  |  |  *          myprog                      libc.so | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * So it'd be easy and straight-forward to find a mapped binary or library from an | 
					
						
							|  |  |  |  * address. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * But for JIT code, the code arena only cares about the code section. But the | 
					
						
							|  |  |  |  * resulting DSOs (which is generated by perf inject -j) contain ELF headers and | 
					
						
							|  |  |  |  * unwind info too. Then it'd generate following address space with synthesized | 
					
						
							|  |  |  |  * MMAP events. Let's say it has a sample between address B and C. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *                                                sample | 
					
						
							|  |  |  |  *                                                  | | 
					
						
							|  |  |  |  *   address ->                         A       B   v   C | 
					
						
							|  |  |  |  *   --------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  |  *   /tmp/jitted-PID-0.so   | (headers) | .text | unwind info | | 
					
						
							|  |  |  |  *   /tmp/jitted-PID-1.so           | (headers) | .text | unwind info | | 
					
						
							|  |  |  |  *   /tmp/jitted-PID-2.so                   | (headers) | .text | unwind info | | 
					
						
							|  |  |  |  *     ... | 
					
						
							|  |  |  |  *   --------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see | 
					
						
							|  |  |  |  * the unwind info. If it maps both .text section and unwind sections, the sample | 
					
						
							|  |  |  |  * could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing | 
					
						
							|  |  |  |  * which one is right. So to make perf happy we have non-overlapping ranges for each | 
					
						
							|  |  |  |  * DSO: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  *   address -> | 
					
						
							|  |  |  |  *   ------------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  |  *   /tmp/jitted-PID-0.so   | (headers) | .text | unwind info | | 
					
						
							|  |  |  |  *   /tmp/jitted-PID-1.so                         | (headers) | .text | unwind info | | 
					
						
							|  |  |  |  *   /tmp/jitted-PID-2.so                                               | (headers) | .text | unwind info | | 
					
						
							|  |  |  |  *     ... | 
					
						
							|  |  |  |  *   ------------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * As the trampolines are constant, we add a constant padding but in general the padding needs to have the | 
					
						
							|  |  |  |  * size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #include "Python.h"
 | 
					
						
							|  |  |  | #include "pycore_ceval.h"         // _PyPerf_Callbacks
 | 
					
						
							|  |  |  | #include "pycore_frame.h"
 | 
					
						
							|  |  |  | #include "pycore_interp.h"
 | 
					
						
							| 
									
										
										
										
											2025-03-17 12:32:43 +01:00
										 |  |  | #include "pycore_runtime.h"       // _PyRuntime
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Standard library includes for perf jitdump implementation */ | 
					
						
							|  |  |  | #include <elf.h>                  // ELF architecture constants
 | 
					
						
							|  |  |  | #include <fcntl.h>                // File control operations
 | 
					
						
							|  |  |  | #include <stdio.h>                // Standard I/O operations
 | 
					
						
							|  |  |  | #include <stdlib.h>               // Standard library functions
 | 
					
						
							|  |  |  | #include <sys/mman.h>             // Memory mapping functions (mmap)
 | 
					
						
							|  |  |  | #include <sys/types.h>            // System data types
 | 
					
						
							|  |  |  | #include <unistd.h>               // System calls (sysconf, getpid)
 | 
					
						
							|  |  |  | #include <sys/time.h>             // Time functions (gettimeofday)
 | 
					
						
							|  |  |  | #include <sys/syscall.h>          // System call interface
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                           CONSTANTS AND CONFIGURATION
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |  * Memory layout considerations for perf jitdump: | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Perf expects non-overlapping memory regions for each JIT-compiled function. | 
					
						
							|  |  |  |  * When perf processes the jitdump file, it creates synthetic DSO (Dynamic | 
					
						
							|  |  |  |  * Shared Object) files that contain: | 
					
						
							|  |  |  |  * - ELF headers | 
					
						
							|  |  |  |  * - .text section (actual machine code) | 
					
						
							|  |  |  |  * - Unwind information (for stack traces) | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * To ensure proper address space layout, we add padding between code regions. | 
					
						
							|  |  |  |  * This prevents address conflicts when perf maps the synthesized DSOs. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Memory layout example: | 
					
						
							|  |  |  |  * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] | 
					
						
							|  |  |  |  * /tmp/jitted-PID-1.so:                                       [headers][.text][unwind_info][padding] | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The padding size (0x100) is chosen to accommodate typical unwind info sizes | 
					
						
							|  |  |  |  * while maintaining 16-byte alignment requirements. | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |  */ | 
					
						
							|  |  |  | #define PERF_JIT_CODE_PADDING 0x100
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Convenient access to the global trampoline API state */ | 
					
						
							|  |  |  | #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Type aliases for clarity and portability */ | 
					
						
							|  |  |  | typedef uint64_t uword;                    // Word-sized unsigned integer
 | 
					
						
							|  |  |  | typedef const char* CodeComments;          // Code comment strings
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Memory size constants */ | 
					
						
							|  |  |  | #define MB (1024 * 1024)                   // 1 Megabyte for buffer sizing
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                        ARCHITECTURE-SPECIFIC DEFINITIONS
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Returns the ELF machine architecture constant for the current platform. | 
					
						
							|  |  |  |  * This is required for the jitdump header to correctly identify the target | 
					
						
							|  |  |  |  * architecture for perf processing. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static uint64_t GetElfMachineArchitecture(void) { | 
					
						
							|  |  |  | #if defined(__x86_64__) || defined(_M_X64)
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     return EM_X86_64; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | #elif defined(__i386__) || defined(_M_IX86)
 | 
					
						
							|  |  |  |     return EM_386; | 
					
						
							|  |  |  | #elif defined(__aarch64__)
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     return EM_AARCH64; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | #elif defined(__arm__) || defined(_M_ARM)
 | 
					
						
							|  |  |  |     return EM_ARM; | 
					
						
							|  |  |  | #elif defined(__riscv)
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     return EM_RISCV; | 
					
						
							|  |  |  | #else
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     Py_UNREACHABLE();  // Unsupported architecture - should never reach here
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     return 0; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                           PERF JITDUMP DATA STRUCTURES
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Perf jitdump file format structures | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * These structures define the binary format that perf expects for JIT dump files. | 
					
						
							|  |  |  |  * The format is documented in the Linux perf tools source code and must match | 
					
						
							|  |  |  |  * exactly for proper perf integration. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Jitdump file header - written once at the beginning of each jitdump file | 
					
						
							|  |  |  |  * Contains metadata about the process and jitdump format version | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     uint32_t magic;              // Magic number (0x4A695444 = "JiTD")
 | 
					
						
							|  |  |  |     uint32_t version;            // Jitdump format version (currently 1)
 | 
					
						
							|  |  |  |     uint32_t size;               // Size of this header structure
 | 
					
						
							|  |  |  |     uint32_t elf_mach_target;    // Target architecture (from GetElfMachineArchitecture)
 | 
					
						
							|  |  |  |     uint32_t reserved;           // Reserved field (must be 0)
 | 
					
						
							|  |  |  |     uint32_t process_id;         // Process ID of the JIT compiler
 | 
					
						
							|  |  |  |     uint64_t time_stamp;         // Timestamp when jitdump was created
 | 
					
						
							|  |  |  |     uint64_t flags;              // Feature flags (currently unused)
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } Header; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Perf event types supported by the jitdump format | 
					
						
							|  |  |  |  * Each event type has a corresponding structure format | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | enum PerfEvent { | 
					
						
							|  |  |  |     PerfLoad = 0,           // Code load event (new JIT function)
 | 
					
						
							|  |  |  |     PerfMove = 1,           // Code move event (function relocated)
 | 
					
						
							|  |  |  |     PerfDebugInfo = 2,      // Debug information event
 | 
					
						
							|  |  |  |     PerfClose = 3,          // JIT session close event
 | 
					
						
							|  |  |  |     PerfUnwindingInfo = 4   // Stack unwinding information event
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Base event structure - common header for all perf events | 
					
						
							|  |  |  |  * Every event in the jitdump file starts with this structure | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | struct BaseEvent { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     uint32_t event;         // Event type (from PerfEvent enum)
 | 
					
						
							|  |  |  |     uint32_t size;          // Total size of this event including payload
 | 
					
						
							|  |  |  |     uint64_t time_stamp;    // Timestamp when event occurred
 | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Code load event - indicates a new JIT-compiled function is available | 
					
						
							|  |  |  |  * This is the most important event type for Python profiling | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     struct BaseEvent base;   // Common event header
 | 
					
						
							|  |  |  |     uint32_t process_id;     // Process ID where code was generated
 | 
					
						
							|  |  |  |     uint32_t thread_id;      // Thread ID where code was generated
 | 
					
						
							|  |  |  |     uint64_t vma;            // Virtual memory address where code is loaded
 | 
					
						
							|  |  |  |     uint64_t code_address;   // Address of the actual machine code
 | 
					
						
							|  |  |  |     uint64_t code_size;      // Size of the machine code in bytes
 | 
					
						
							|  |  |  |     uint64_t code_id;        // Unique identifier for this code region
 | 
					
						
							|  |  |  |     /* Followed by:
 | 
					
						
							|  |  |  |      * - null-terminated function name string | 
					
						
							|  |  |  |      * - raw machine code bytes | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } CodeLoadEvent; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Code unwinding information event - provides DWARF data for stack traces | 
					
						
							|  |  |  |  * Essential for proper stack unwinding during profiling | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     struct BaseEvent base;      // Common event header
 | 
					
						
							|  |  |  |     uint64_t unwind_data_size;  // Size of the unwinding data
 | 
					
						
							|  |  |  |     uint64_t eh_frame_hdr_size; // Size of the EH frame header
 | 
					
						
							|  |  |  |     uint64_t mapped_size;       // Total mapped size (with padding)
 | 
					
						
							|  |  |  |     /* Followed by:
 | 
					
						
							|  |  |  |      * - EH frame header | 
					
						
							|  |  |  |      * - DWARF unwinding information | 
					
						
							|  |  |  |      * - Padding to alignment boundary | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } CodeUnwindingInfoEvent; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              GLOBAL STATE MANAGEMENT
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Global state for the perf jitdump implementation | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This structure maintains all the state needed for generating jitdump files. | 
					
						
							|  |  |  |  * It's designed as a singleton since there's typically only one jitdump file | 
					
						
							|  |  |  |  * per Python process. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | typedef struct { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     FILE* perf_map;          // File handle for the jitdump file
 | 
					
						
							|  |  |  |     PyThread_type_lock map_lock;  // Thread synchronization lock
 | 
					
						
							|  |  |  |     void* mapped_buffer;     // Memory-mapped region (signals perf we're active)
 | 
					
						
							|  |  |  |     size_t mapped_size;      // Size of the mapped region
 | 
					
						
							|  |  |  |     int code_id;             // Counter for unique code region identifiers
 | 
					
						
							|  |  |  | } PerfMapJitState; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Global singleton instance */ | 
					
						
							|  |  |  | static PerfMapJitState perf_jit_map_state; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              TIME UTILITIES
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Time conversion constant */ | 
					
						
							|  |  |  | static const intptr_t nanoseconds_per_second = 1000000000; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Get current monotonic time in nanoseconds | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Monotonic time is preferred for event timestamps because it's not affected | 
					
						
							|  |  |  |  * by system clock adjustments. This ensures consistent timing relationships | 
					
						
							|  |  |  |  * between events even if the system clock is changed. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns: Current monotonic time in nanoseconds since an arbitrary epoch | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static int64_t get_current_monotonic_ticks(void) { | 
					
						
							|  |  |  |     struct timespec ts; | 
					
						
							|  |  |  |     if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         Py_UNREACHABLE();  // Should never fail on supported systems
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Convert to nanoseconds for maximum precision */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     int64_t result = ts.tv_sec; | 
					
						
							|  |  |  |     result *= nanoseconds_per_second; | 
					
						
							|  |  |  |     result += ts.tv_nsec; | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Get current wall clock time in microseconds | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Used for the jitdump file header timestamp. Unlike monotonic time, | 
					
						
							|  |  |  |  * this represents actual wall clock time that can be correlated with | 
					
						
							|  |  |  |  * other system events. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns: Current time in microseconds since Unix epoch | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static int64_t get_current_time_microseconds(void) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     struct timeval tv; | 
					
						
							|  |  |  |     if (gettimeofday(&tv, NULL) < 0) { | 
					
						
							|  |  |  |         Py_UNREACHABLE();  // Should never fail on supported systems
 | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              UTILITY FUNCTIONS
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Round up a value to the next multiple of a given number | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This is essential for maintaining proper alignment requirements in the | 
					
						
							|  |  |  |  * jitdump format. Many structures need to be aligned to specific boundaries | 
					
						
							|  |  |  |  * (typically 8 or 16 bytes) for efficient processing by perf. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   value: The value to round up | 
					
						
							|  |  |  |  *   multiple: The multiple to round up to | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns: The smallest value >= input that is a multiple of 'multiple' | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static size_t round_up(int64_t value, int64_t multiple) { | 
					
						
							|  |  |  |     if (multiple == 0) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         return value;  // Avoid division by zero
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int64_t remainder = value % multiple; | 
					
						
							|  |  |  |     if (remainder == 0) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         return value;  // Already aligned
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Calculate how much to add to reach the next multiple */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     int64_t difference = multiple - remainder; | 
					
						
							|  |  |  |     int64_t rounded_up_value = value + difference; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return rounded_up_value; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              FILE I/O UTILITIES
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Write data to the jitdump file with error handling | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This function ensures that all data is written to the file, handling | 
					
						
							|  |  |  |  * partial writes that can occur with large buffers or when the system | 
					
						
							|  |  |  |  * is under load. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   buffer: Pointer to data to write | 
					
						
							|  |  |  |  *   size: Number of bytes to write | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static void perf_map_jit_write_fully(const void* buffer, size_t size) { | 
					
						
							|  |  |  |     FILE* out_file = perf_jit_map_state.perf_map; | 
					
						
							|  |  |  |     const char* ptr = (const char*)(buffer); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     while (size > 0) { | 
					
						
							|  |  |  |         const size_t written = fwrite(ptr, 1, size, out_file); | 
					
						
							|  |  |  |         if (written == 0) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |             Py_UNREACHABLE();  // Write failure - should be very rare
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         size -= written; | 
					
						
							|  |  |  |         ptr += written; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Write the jitdump file header | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The header must be written exactly once at the beginning of each jitdump | 
					
						
							|  |  |  |  * file. It provides metadata that perf uses to parse the rest of the file. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   pid: Process ID to include in the header | 
					
						
							|  |  |  |  *   out_file: File handle to write to (currently unused, uses global state) | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static void perf_map_jit_write_header(int pid, FILE* out_file) { | 
					
						
							|  |  |  |     Header header; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Initialize header with required values */ | 
					
						
							|  |  |  |     header.magic = 0x4A695444;                    // "JiTD" magic number
 | 
					
						
							|  |  |  |     header.version = 1;                           // Current jitdump version
 | 
					
						
							|  |  |  |     header.size = sizeof(Header);                 // Header size for validation
 | 
					
						
							|  |  |  |     header.elf_mach_target = GetElfMachineArchitecture();  // Target architecture
 | 
					
						
							|  |  |  |     header.process_id = pid;                      // Process identifier
 | 
					
						
							|  |  |  |     header.time_stamp = get_current_time_microseconds();   // Creation time
 | 
					
						
							|  |  |  |     header.flags = 0;                             // No special flags currently used
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     perf_map_jit_write_fully(&header, sizeof(header)); | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              DWARF CONSTANTS AND UTILITIES
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * DWARF (Debug With Arbitrary Record Formats) constants | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * DWARF is a debugging data format used to provide stack unwinding information. | 
					
						
							|  |  |  |  * These constants define the various encoding types and opcodes used in | 
					
						
							|  |  |  |  * DWARF Call Frame Information (CFI) records. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* DWARF Call Frame Information version */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #define DWRF_CIE_VERSION 1
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* DWARF CFA (Call Frame Address) opcodes */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | enum { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     DWRF_CFA_nop = 0x0,                    // No operation
 | 
					
						
							|  |  |  |     DWRF_CFA_offset_extended = 0x5,        // Extended offset instruction
 | 
					
						
							|  |  |  |     DWRF_CFA_def_cfa = 0xc,               // Define CFA rule
 | 
					
						
							|  |  |  |     DWRF_CFA_def_cfa_offset = 0xe,        // Define CFA offset
 | 
					
						
							|  |  |  |     DWRF_CFA_offset_extended_sf = 0x11,   // Extended signed offset
 | 
					
						
							|  |  |  |     DWRF_CFA_advance_loc = 0x40,          // Advance location counter
 | 
					
						
							|  |  |  |     DWRF_CFA_offset = 0x80                // Simple offset instruction
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* DWARF Exception Handling pointer encodings */ | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  |     DWRF_EH_PE_absptr = 0x00,             // Absolute pointer
 | 
					
						
							|  |  |  |     DWRF_EH_PE_omit = 0xff,               // Omitted value
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Data type encodings */ | 
					
						
							|  |  |  |     DWRF_EH_PE_uleb128 = 0x01,            // Unsigned LEB128
 | 
					
						
							|  |  |  |     DWRF_EH_PE_udata2 = 0x02,             // Unsigned 2-byte
 | 
					
						
							|  |  |  |     DWRF_EH_PE_udata4 = 0x03,             // Unsigned 4-byte
 | 
					
						
							|  |  |  |     DWRF_EH_PE_udata8 = 0x04,             // Unsigned 8-byte
 | 
					
						
							|  |  |  |     DWRF_EH_PE_sleb128 = 0x09,            // Signed LEB128
 | 
					
						
							|  |  |  |     DWRF_EH_PE_sdata2 = 0x0a,             // Signed 2-byte
 | 
					
						
							|  |  |  |     DWRF_EH_PE_sdata4 = 0x0b,             // Signed 4-byte
 | 
					
						
							|  |  |  |     DWRF_EH_PE_sdata8 = 0x0c,             // Signed 8-byte
 | 
					
						
							|  |  |  |     DWRF_EH_PE_signed = 0x08,             // Signed flag
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Reference type encodings */ | 
					
						
							|  |  |  |     DWRF_EH_PE_pcrel = 0x10,              // PC-relative
 | 
					
						
							|  |  |  |     DWRF_EH_PE_textrel = 0x20,            // Text-relative
 | 
					
						
							|  |  |  |     DWRF_EH_PE_datarel = 0x30,            // Data-relative
 | 
					
						
							|  |  |  |     DWRF_EH_PE_funcrel = 0x40,            // Function-relative
 | 
					
						
							|  |  |  |     DWRF_EH_PE_aligned = 0x50,            // Aligned
 | 
					
						
							|  |  |  |     DWRF_EH_PE_indirect = 0x80            // Indirect
 | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Additional DWARF constants for debug information */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | enum { DWRF_TAG_compile_unit = 0x11 }; | 
					
						
							|  |  |  | enum { DWRF_children_no = 0, DWRF_children_yes = 1 }; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | enum { | 
					
						
							|  |  |  |     DWRF_AT_name = 0x03,         // Name attribute
 | 
					
						
							|  |  |  |     DWRF_AT_stmt_list = 0x10,    // Statement list
 | 
					
						
							|  |  |  |     DWRF_AT_low_pc = 0x11,       // Low PC address
 | 
					
						
							|  |  |  |     DWRF_AT_high_pc = 0x12       // High PC address
 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  |     DWRF_FORM_addr = 0x01,       // Address form
 | 
					
						
							|  |  |  |     DWRF_FORM_data4 = 0x06,      // 4-byte data
 | 
					
						
							|  |  |  |     DWRF_FORM_string = 0x08      // String form
 | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Line number program opcodes */ | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  |     DWRF_LNS_extended_op = 0,    // Extended opcode
 | 
					
						
							|  |  |  |     DWRF_LNS_copy = 1,           // Copy operation
 | 
					
						
							|  |  |  |     DWRF_LNS_advance_pc = 2,     // Advance program counter
 | 
					
						
							|  |  |  |     DWRF_LNS_advance_line = 3    // Advance line number
 | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* Line number extended opcodes */ | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  |     DWRF_LNE_end_sequence = 1,   // End of sequence
 | 
					
						
							|  |  |  |     DWRF_LNE_set_address = 2     // Set address
 | 
					
						
							|  |  |  | }; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Architecture-specific DWARF register numbers | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * These constants define the register numbering scheme used by DWARF | 
					
						
							|  |  |  |  * for each supported architecture. The numbers must match the ABI | 
					
						
							|  |  |  |  * specification for proper stack unwinding. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | enum { | 
					
						
							|  |  |  | #ifdef __x86_64__
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* x86_64 register numbering (note: order is defined by x86_64 ABI) */ | 
					
						
							|  |  |  |     DWRF_REG_AX,    // RAX
 | 
					
						
							|  |  |  |     DWRF_REG_DX,    // RDX
 | 
					
						
							|  |  |  |     DWRF_REG_CX,    // RCX
 | 
					
						
							|  |  |  |     DWRF_REG_BX,    // RBX
 | 
					
						
							|  |  |  |     DWRF_REG_SI,    // RSI
 | 
					
						
							|  |  |  |     DWRF_REG_DI,    // RDI
 | 
					
						
							|  |  |  |     DWRF_REG_BP,    // RBP
 | 
					
						
							|  |  |  |     DWRF_REG_SP,    // RSP
 | 
					
						
							|  |  |  |     DWRF_REG_8,     // R8
 | 
					
						
							|  |  |  |     DWRF_REG_9,     // R9
 | 
					
						
							|  |  |  |     DWRF_REG_10,    // R10
 | 
					
						
							|  |  |  |     DWRF_REG_11,    // R11
 | 
					
						
							|  |  |  |     DWRF_REG_12,    // R12
 | 
					
						
							|  |  |  |     DWRF_REG_13,    // R13
 | 
					
						
							|  |  |  |     DWRF_REG_14,    // R14
 | 
					
						
							|  |  |  |     DWRF_REG_15,    // R15
 | 
					
						
							|  |  |  |     DWRF_REG_RA,    // Return address (RIP)
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* AArch64 register numbering */ | 
					
						
							|  |  |  |     DWRF_REG_FP = 29,  // Frame Pointer
 | 
					
						
							|  |  |  |     DWRF_REG_RA = 30,  // Link register (return address)
 | 
					
						
							|  |  |  |     DWRF_REG_SP = 31,  // Stack pointer
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #else
 | 
					
						
							|  |  |  | #    error "Unsupported target architecture"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /* DWARF encoding constants used in EH frame headers */ | 
					
						
							|  |  |  | static const uint8_t DwarfUData4 = 0x03;     // Unsigned 4-byte data
 | 
					
						
							|  |  |  | static const uint8_t DwarfSData4 = 0x0b;     // Signed 4-byte data
 | 
					
						
							|  |  |  | static const uint8_t DwarfPcRel = 0x10;      // PC-relative encoding
 | 
					
						
							|  |  |  | static const uint8_t DwarfDataRel = 0x30;    // Data-relative encoding
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              ELF OBJECT CONTEXT
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Context for building ELF/DWARF structures | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This structure maintains state while constructing DWARF unwind information. | 
					
						
							|  |  |  |  * It acts as a simple buffer manager with pointers to track current position | 
					
						
							|  |  |  |  * and important landmarks within the buffer. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | typedef struct ELFObjectContext { | 
					
						
							|  |  |  |     uint8_t* p;            // Current write position in buffer
 | 
					
						
							|  |  |  |     uint8_t* startp;       // Start of buffer (for offset calculations)
 | 
					
						
							|  |  |  |     uint8_t* eh_frame_p;   // Start of EH frame data (for relative offsets)
 | 
					
						
							|  |  |  |     uint32_t code_size;    // Size of the code being described
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } ELFObjectContext; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * EH Frame Header structure for DWARF unwinding | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This structure provides metadata about the DWARF unwinding information | 
					
						
							|  |  |  |  * that follows. It's required by the perf jitdump format to enable proper | 
					
						
							|  |  |  |  * stack unwinding during profiling. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     unsigned char version;           // EH frame version (always 1)
 | 
					
						
							|  |  |  |     unsigned char eh_frame_ptr_enc;  // Encoding of EH frame pointer
 | 
					
						
							|  |  |  |     unsigned char fde_count_enc;     // Encoding of FDE count
 | 
					
						
							|  |  |  |     unsigned char table_enc;         // Encoding of table entries
 | 
					
						
							|  |  |  |     int32_t eh_frame_ptr;           // Pointer to EH frame data
 | 
					
						
							|  |  |  |     int32_t eh_fde_count;           // Number of FDEs (Frame Description Entries)
 | 
					
						
							|  |  |  |     int32_t from;                   // Start address of code range
 | 
					
						
							|  |  |  |     int32_t to;                     // End address of code range
 | 
					
						
							|  |  |  | } EhFrameHeader; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              DWARF GENERATION UTILITIES
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Append a null-terminated string to the ELF context buffer | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   ctx: ELF object context | 
					
						
							|  |  |  |  *   str: String to append (must be null-terminated) | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns: Offset from start of buffer where string was written | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static uint32_t elfctx_append_string(ELFObjectContext* ctx, const char* str) { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     uint8_t* p = ctx->p; | 
					
						
							|  |  |  |     uint32_t ofs = (uint32_t)(p - ctx->startp); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Copy string including null terminator */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     do { | 
					
						
							|  |  |  |         *p++ = (uint8_t)*str; | 
					
						
							|  |  |  |     } while (*str++); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ctx->p = p; | 
					
						
							|  |  |  |     return ofs; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Append a SLEB128 (Signed Little Endian Base 128) value | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * SLEB128 is a variable-length encoding used extensively in DWARF. | 
					
						
							|  |  |  |  * It efficiently encodes small numbers in fewer bytes. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   ctx: ELF object context | 
					
						
							|  |  |  |  *   v: Signed value to encode | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static void elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     uint8_t* p = ctx->p; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Encode 7 bits at a time, with continuation bit in MSB */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         *p++ = (uint8_t)((v & 0x7f) | 0x80);  // Set continuation bit
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     *p++ = (uint8_t)(v & 0x7f);  // Final byte without continuation bit
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ctx->p = p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Append a ULEB128 (Unsigned Little Endian Base 128) value | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Similar to SLEB128 but for unsigned values. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   ctx: ELF object context | 
					
						
							|  |  |  |  *   v: Unsigned value to encode | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static void elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     uint8_t* p = ctx->p; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Encode 7 bits at a time, with continuation bit in MSB */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     for (; v >= 0x80; v >>= 7) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         *p++ = (char)((v & 0x7f) | 0x80);  // Set continuation bit
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     *p++ = (char)v;  // Final byte without continuation bit
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ctx->p = p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  |  * Macros for generating DWARF structures | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * These macros provide a convenient way to write various data types | 
					
						
							|  |  |  |  * to the DWARF buffer while automatically advancing the pointer. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | #define DWRF_U8(x) (*p++ = (x))                                    // Write unsigned 8-bit
 | 
					
						
							|  |  |  | #define DWRF_I8(x) (*(int8_t*)p = (x), p++)                       // Write signed 8-bit
 | 
					
						
							|  |  |  | #define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2)                 // Write unsigned 16-bit
 | 
					
						
							|  |  |  | #define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4)                 // Write unsigned 32-bit
 | 
					
						
							|  |  |  | #define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t)) // Write address
 | 
					
						
							|  |  |  | #define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p) // Write ULEB128
 | 
					
						
							|  |  |  | #define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p) // Write SLEB128
 | 
					
						
							|  |  |  | #define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p) // Write string
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Align to specified boundary with NOP instructions */ | 
					
						
							|  |  |  | #define DWRF_ALIGNNOP(s)                                          \
 | 
					
						
							|  |  |  |     while ((uintptr_t)p & ((s)-1)) {                              \ | 
					
						
							|  |  |  |         *p++ = DWRF_CFA_nop;                                       \ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | /* Write a DWARF section with automatic size calculation */ | 
					
						
							|  |  |  | #define DWRF_SECTION(name, stmt)                                  \
 | 
					
						
							|  |  |  |     {                                                             \ | 
					
						
							|  |  |  |         uint32_t* szp_##name = (uint32_t*)p;                      \ | 
					
						
							|  |  |  |         p += 4;                                                   \ | 
					
						
							|  |  |  |         stmt;                                                     \ | 
					
						
							|  |  |  |         *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4); \ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              DWARF EH FRAME GENERATION
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Initialize DWARF .eh_frame section for a code region | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The .eh_frame section contains Call Frame Information (CFI) that describes | 
					
						
							|  |  |  |  * how to unwind the stack at any point in the code. This is essential for | 
					
						
							|  |  |  |  * proper profiling as it allows perf to generate accurate call graphs. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The function generates two main components: | 
					
						
							|  |  |  |  * 1. CIE (Common Information Entry) - describes calling conventions | 
					
						
							|  |  |  |  * 2. FDE (Frame Description Entry) - describes specific function unwinding | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   ctx: ELF object context containing code size and buffer pointers | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static void elf_init_ehframe(ELFObjectContext* ctx) { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     uint8_t* p = ctx->p; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     uint8_t* framep = p;  // Remember start of frame data
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |     * DWARF Unwind Table for Trampoline Function | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * This section defines DWARF Call Frame Information (CFI) using encoded macros | 
					
						
							|  |  |  |     * like `DWRF_U8`, `DWRF_UV`, and `DWRF_SECTION` to describe how the trampoline function | 
					
						
							|  |  |  |     * preserves and restores registers. This is used by profiling tools (e.g., `perf`) | 
					
						
							|  |  |  |     * and debuggers for stack unwinding in JIT-compiled code. | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * ------------------------------------------------- | 
					
						
							|  |  |  |     * TO REGENERATE THIS TABLE FROM GCC OBJECTS: | 
					
						
							|  |  |  |     * ------------------------------------------------- | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * 1. Create a trampoline source file (e.g., `trampoline.c`): | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *      #include <Python.h> | 
					
						
							|  |  |  |     *      typedef PyObject* (*py_evaluator)(void*, void*, int); | 
					
						
							|  |  |  |     *      PyObject* trampoline(void *ts, void *f, int throwflag, py_evaluator evaluator) { | 
					
						
							|  |  |  |     *          return evaluator(ts, f, throwflag); | 
					
						
							|  |  |  |     *      } | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * 2. Compile to an object file with frame pointer preservation: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *      gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * 3. Extract DWARF unwind info from the object file: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *      readelf -w trampoline.o | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *    Example output from `.eh_frame`: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *      00000000 CIE | 
					
						
							|  |  |  |     *        Version:               1 | 
					
						
							|  |  |  |     *        Augmentation:          "zR" | 
					
						
							|  |  |  |     *        Code alignment factor: 4 | 
					
						
							|  |  |  |     *        Data alignment factor: -8 | 
					
						
							|  |  |  |     *        Return address column: 30 | 
					
						
							|  |  |  |     *        DW_CFA_def_cfa: r31 (sp) ofs 0 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *      00000014 FDE cie=00000000 pc=0..14 | 
					
						
							|  |  |  |     *        DW_CFA_advance_loc: 4 | 
					
						
							|  |  |  |     *        DW_CFA_def_cfa_offset: 16 | 
					
						
							|  |  |  |     *        DW_CFA_offset: r29 at cfa-16 | 
					
						
							|  |  |  |     *        DW_CFA_offset: r30 at cfa-8 | 
					
						
							|  |  |  |     *        DW_CFA_advance_loc: 12 | 
					
						
							|  |  |  |     *        DW_CFA_restore: r30 | 
					
						
							|  |  |  |     *        DW_CFA_restore: r29 | 
					
						
							|  |  |  |     *        DW_CFA_def_cfa_offset: 0 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * -- These values can be verified by comparing with `readelf -w` or `llvm-dwarfdump --eh-frame`. | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * ---------------------------------- | 
					
						
							|  |  |  |     * HOW TO TRANSLATE TO DWRF_* MACROS: | 
					
						
							|  |  |  |     * ---------------------------------- | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * After compiling your trampoline with: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *     gcc trampoline.c -I. -I./Include -O2 -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -c | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * run: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *     readelf -w trampoline.o | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * to inspect the generated `.eh_frame` data. You will see two main components: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *     1. A CIE (Common Information Entry): shared configuration used by all FDEs. | 
					
						
							|  |  |  |     *     2. An FDE (Frame Description Entry): function-specific unwind instructions. | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * --------------------- | 
					
						
							|  |  |  |     * Translating the CIE: | 
					
						
							|  |  |  |     * --------------------- | 
					
						
							|  |  |  |     * From `readelf -w`, you might see: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *   00000000 0000000000000010 00000000 CIE | 
					
						
							|  |  |  |     *     Version:               1 | 
					
						
							|  |  |  |     *     Augmentation:          "zR" | 
					
						
							|  |  |  |     *     Code alignment factor: 4 | 
					
						
							|  |  |  |     *     Data alignment factor: -8 | 
					
						
							|  |  |  |     *     Return address column: 30 | 
					
						
							|  |  |  |     *     Augmentation data:     1b | 
					
						
							|  |  |  |     *     DW_CFA_def_cfa: r31 (sp) ofs 0 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * Map this to: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *     DWRF_SECTION(CIE, | 
					
						
							|  |  |  |     *         DWRF_U32(0);                             // CIE ID (always 0 for CIEs)
 | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CIE_VERSION);              // Version: 1
 | 
					
						
							|  |  |  |     *         DWRF_STR("zR");                         // Augmentation string "zR"
 | 
					
						
							|  |  |  |     *         DWRF_UV(4);                             // Code alignment factor = 4
 | 
					
						
							|  |  |  |     *         DWRF_SV(-8);                            // Data alignment factor = -8
 | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_REG_RA);                   // Return address register (e.g., x30 = 30)
 | 
					
						
							|  |  |  |     *         DWRF_UV(1);                             // Augmentation data length = 1
 | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // Encoding for FDE pointers
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_def_cfa);              // DW_CFA_def_cfa
 | 
					
						
							|  |  |  |     *         DWRF_UV(DWRF_REG_SP);                   // Register: SP (r31)
 | 
					
						
							|  |  |  |     *         DWRF_UV(0);                             // Offset = 0
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_ALIGNNOP(sizeof(uintptr_t));       // Align to pointer size boundary
 | 
					
						
							|  |  |  |     *     ) | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * Notes: | 
					
						
							|  |  |  |     *   - Use `DWRF_UV` for unsigned LEB128, `DWRF_SV` for signed LEB128. | 
					
						
							|  |  |  |     *   - `DWRF_REG_RA` and `DWRF_REG_SP` are architecture-defined constants. | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * --------------------- | 
					
						
							|  |  |  |     * Translating the FDE: | 
					
						
							|  |  |  |     * --------------------- | 
					
						
							|  |  |  |     * From `readelf -w`: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *   00000014 0000000000000020 00000018 FDE cie=00000000 pc=0000000000000000..0000000000000014 | 
					
						
							|  |  |  |     *     DW_CFA_advance_loc: 4 | 
					
						
							|  |  |  |     *     DW_CFA_def_cfa_offset: 16 | 
					
						
							|  |  |  |     *     DW_CFA_offset: r29 at cfa-16 | 
					
						
							|  |  |  |     *     DW_CFA_offset: r30 at cfa-8 | 
					
						
							|  |  |  |     *     DW_CFA_advance_loc: 12 | 
					
						
							|  |  |  |     *     DW_CFA_restore: r30 | 
					
						
							|  |  |  |     *     DW_CFA_restore: r29 | 
					
						
							|  |  |  |     *     DW_CFA_def_cfa_offset: 0 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * Map the FDE header and instructions to: | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *     DWRF_SECTION(FDE, | 
					
						
							|  |  |  |     *         DWRF_U32((uint32_t)(p - framep));       // Offset to CIE (relative from here)
 | 
					
						
							|  |  |  |     *         DWRF_U32(-0x30);                        // Initial PC-relative location of the code
 | 
					
						
							|  |  |  |     *         DWRF_U32(ctx->code_size);               // Code range covered by this FDE
 | 
					
						
							|  |  |  |     *         DWRF_U8(0);                             // Augmentation data length (none)
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_advance_loc | 1);      // Advance location by 1 unit (1 * 4 = 4 bytes)
 | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_def_cfa_offset);       // CFA = SP + 16
 | 
					
						
							|  |  |  |     *         DWRF_UV(16); | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Save x29 (frame pointer)
 | 
					
						
							|  |  |  |     *         DWRF_UV(2);                             // At offset 2 * 8 = 16 bytes
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Save x30 (return address)
 | 
					
						
							|  |  |  |     *         DWRF_UV(1);                             // At offset 1 * 8 = 8 bytes
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_advance_loc | 3);      // Advance location by 3 units (3 * 4 = 12 bytes)
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // Restore x30
 | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP); // Restore x29
 | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     *         DWRF_U8(DWRF_CFA_def_cfa_offset);       // CFA = SP
 | 
					
						
							|  |  |  |     *         DWRF_UV(0); | 
					
						
							|  |  |  |     *     ) | 
					
						
							|  |  |  |     * | 
					
						
							|  |  |  |     * To regenerate: | 
					
						
							|  |  |  |     *   1. Get the `code alignment factor`, `data alignment factor`, and `RA column` from the CIE. | 
					
						
							|  |  |  |     *   2. Note the range of the function from the FDE's `pc=...` line and map it to the JIT code as | 
					
						
							|  |  |  |     *      the code is in a different address space every time. | 
					
						
							|  |  |  |     *   3. For each `DW_CFA_*` entry, use the corresponding `DWRF_*` macro: | 
					
						
							|  |  |  |     *        - `DW_CFA_def_cfa_offset`     → DWRF_U8(DWRF_CFA_def_cfa_offset), DWRF_UV(value) | 
					
						
							|  |  |  |     *        - `DW_CFA_offset: rX`         → DWRF_U8(DWRF_CFA_offset | reg), DWRF_UV(offset) | 
					
						
							|  |  |  |     *        - `DW_CFA_restore: rX`        → DWRF_U8(DWRF_CFA_offset | reg) // restore is same as reusing offset
 | 
					
						
							|  |  |  |     *        - `DW_CFA_advance_loc: N`     → DWRF_U8(DWRF_CFA_advance_loc | (N / code_alignment_factor)) | 
					
						
							|  |  |  |     *   4. Use `DWRF_REG_FP`, `DWRF_REG_RA`, etc., for register numbers. | 
					
						
							|  |  |  |     *   5. Use `sizeof(uintptr_t)` (typically 8) for pointer size calculations and alignment. | 
					
						
							|  |  |  |     */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Emit DWARF EH CIE (Common Information Entry) | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * The CIE describes the calling conventions and basic unwinding rules | 
					
						
							|  |  |  |      * that apply to all functions in this compilation unit. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     DWRF_SECTION(CIE, | 
					
						
							|  |  |  |         DWRF_U32(0);                           // CIE ID (0 indicates this is a CIE)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CIE_VERSION);            // CIE version (1)
 | 
					
						
							|  |  |  |         DWRF_STR("zR");                       // Augmentation string ("zR" = has LSDA)
 | 
					
						
							|  |  |  |         DWRF_UV(1);                           // Code alignment factor
 | 
					
						
							|  |  |  |         DWRF_SV(-(int64_t)sizeof(uintptr_t)); // Data alignment factor (negative)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_REG_RA);                 // Return address register number
 | 
					
						
							|  |  |  |         DWRF_UV(1);                           // Augmentation data length
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); // FDE pointer encoding
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /* Initial CFI instructions - describe default calling convention */ | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_def_cfa);            // Define CFA (Call Frame Address)
 | 
					
						
							|  |  |  |         DWRF_UV(DWRF_REG_SP);                 // CFA = SP register
 | 
					
						
							|  |  |  |         DWRF_UV(sizeof(uintptr_t));           // CFA = SP + pointer_size
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); // Return address is saved
 | 
					
						
							|  |  |  |         DWRF_UV(1);                           // At offset 1 from CFA
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         DWRF_ALIGNNOP(sizeof(uintptr_t));     // Align to pointer boundary
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     ctx->eh_frame_p = p;  // Remember start of FDE data
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Emit DWARF EH FDE (Frame Description Entry) | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * The FDE describes unwinding information specific to this function. | 
					
						
							|  |  |  |      * It references the CIE and provides function-specific CFI instructions. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     DWRF_SECTION(FDE, | 
					
						
							|  |  |  |         DWRF_U32((uint32_t)(p - framep));     // Offset to CIE (backwards reference)
 | 
					
						
							|  |  |  |         DWRF_U32(-0x30);                      // Machine code offset relative to .text
 | 
					
						
							|  |  |  |         DWRF_U32(ctx->code_size);             // Address range covered by this FDE (code lenght)
 | 
					
						
							|  |  |  |         DWRF_U8(0);                           // Augmentation data length (none)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         /*
 | 
					
						
							|  |  |  |          * Architecture-specific CFI instructions | 
					
						
							|  |  |  |          * | 
					
						
							|  |  |  |          * These instructions describe how registers are saved and restored | 
					
						
							|  |  |  |          * during function calls. Each architecture has different calling | 
					
						
							|  |  |  |          * conventions and register usage patterns. | 
					
						
							|  |  |  |          */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #ifdef __x86_64__
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         /* x86_64 calling convention unwinding rules */ | 
					
						
							| 
									
										
										
										
											2025-06-03 09:09:43 +02:00
										 |  |  | #  if defined(__CET__) && (__CET__ & 1)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_advance_loc | 8);    // Advance location by 8 bytes when CET protection is enabled
 | 
					
						
							|  |  |  | #  else
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         DWRF_U8(DWRF_CFA_advance_loc | 4);    // Advance location by 4 bytes
 | 
					
						
							| 
									
										
										
										
											2025-06-03 09:09:43 +02:00
										 |  |  | #  endif
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         DWRF_U8(DWRF_CFA_def_cfa_offset);     // Redefine CFA offset
 | 
					
						
							|  |  |  |         DWRF_UV(16);                          // New offset: SP + 16
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_advance_loc | 6);    // Advance location by 6 bytes
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_def_cfa_offset);     // Redefine CFA offset
 | 
					
						
							|  |  |  |         DWRF_UV(8);                           // New offset: SP + 8
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         /* AArch64 calling convention unwinding rules */ | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_advance_loc | 1);        // Advance location by 1 instruction (stp x29, x30)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_def_cfa_offset);         // Redefine CFA offset
 | 
					
						
							|  |  |  |         DWRF_UV(16);                              // CFA = SP + 16 (stack pointer after push)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP);   // Frame pointer (x29) saved
 | 
					
						
							|  |  |  |         DWRF_UV(2);                               // At offset 2 from CFA (2 * 8 = 16 bytes)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA);   // Link register (x30) saved
 | 
					
						
							|  |  |  |         DWRF_UV(1);                               // At offset 1 from CFA (1 * 8 = 8 bytes)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_advance_loc | 3);        // Advance by 3 instructions (mov x16, x3; mov x29, sp; ldp...)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_offset | DWRF_REG_FP);   // Restore frame pointer (x29)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA);   // Restore link register (x30)
 | 
					
						
							|  |  |  |         DWRF_U8(DWRF_CFA_def_cfa_offset);         // Final CFA adjustment
 | 
					
						
							|  |  |  |         DWRF_UV(0);                               // CFA = SP + 0 (stack restored)
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #else
 | 
					
						
							|  |  |  | #    error "Unsupported target architecture"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         DWRF_ALIGNNOP(sizeof(uintptr_t));     // Align to pointer boundary
 | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ctx->p = p;  // Update context pointer to end of generated data
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              JITDUMP INITIALIZATION
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Initialize the perf jitdump interface | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This function sets up everything needed to generate jitdump files: | 
					
						
							|  |  |  |  * 1. Creates the jitdump file with a unique name | 
					
						
							|  |  |  |  * 2. Maps the first page to signal perf that we're using the interface | 
					
						
							|  |  |  |  * 3. Writes the jitdump header | 
					
						
							|  |  |  |  * 4. Initializes synchronization primitives | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The memory mapping is crucial - perf detects jitdump files by scanning | 
					
						
							|  |  |  |  * for processes that have mapped files matching the pattern /tmp/jit-*.dump | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns: Pointer to initialized state, or NULL on failure | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | static void* perf_map_jit_init(void) { | 
					
						
							|  |  |  |     char filename[100]; | 
					
						
							|  |  |  |     int pid = getpid(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Create unique filename based on process ID */ | 
					
						
							|  |  |  |     snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Create/open the jitdump file with appropriate permissions */ | 
					
						
							|  |  |  |     const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); | 
					
						
							|  |  |  |     if (fd == -1) { | 
					
						
							|  |  |  |         return NULL;  // Failed to create file
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Get system page size for memory mapping */ | 
					
						
							|  |  |  |     const long page_size = sysconf(_SC_PAGESIZE); | 
					
						
							|  |  |  |     if (page_size == -1) { | 
					
						
							|  |  |  |         close(fd); | 
					
						
							|  |  |  |         return NULL;  // Failed to get page size
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Map the first page of the jitdump file | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * This memory mapping serves as a signal to perf that this process | 
					
						
							|  |  |  |      * is generating JIT code. Perf scans /proc/.../maps looking for mapped | 
					
						
							|  |  |  |      * files that match the jitdump naming pattern. | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * The mapping must be PROT_READ | PROT_EXEC to be detected by perf. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     perf_jit_map_state.mapped_buffer = mmap( | 
					
						
							|  |  |  |         NULL,                    // Let kernel choose address
 | 
					
						
							|  |  |  |         page_size,               // Map one page
 | 
					
						
							|  |  |  |         PROT_READ | PROT_EXEC,   // Read and execute permissions (required by perf)
 | 
					
						
							|  |  |  |         MAP_PRIVATE,             // Private mapping
 | 
					
						
							|  |  |  |         fd,                      // File descriptor
 | 
					
						
							|  |  |  |         0                        // Offset 0 (first page)
 | 
					
						
							|  |  |  |     ); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (perf_jit_map_state.mapped_buffer == NULL) { | 
					
						
							|  |  |  |         close(fd); | 
					
						
							|  |  |  |         return NULL;  // Memory mapping failed
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     perf_jit_map_state.mapped_size = page_size; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Convert file descriptor to FILE* for easier I/O operations */ | 
					
						
							|  |  |  |     perf_jit_map_state.perf_map = fdopen(fd, "w+"); | 
					
						
							|  |  |  |     if (perf_jit_map_state.perf_map == NULL) { | 
					
						
							|  |  |  |         close(fd); | 
					
						
							|  |  |  |         return NULL;  // Failed to create FILE*
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Set up file buffering for better performance | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * We use a large buffer (2MB) because jitdump files can be written | 
					
						
							|  |  |  |      * frequently during program execution. Buffering reduces system call | 
					
						
							|  |  |  |      * overhead and improves overall performance. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Write the jitdump file header */ | 
					
						
							|  |  |  |     perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Initialize thread synchronization lock | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * Multiple threads may attempt to write to the jitdump file | 
					
						
							|  |  |  |      * simultaneously. This lock ensures thread-safe access to the | 
					
						
							|  |  |  |      * global jitdump state. | 
					
						
							|  |  |  |      */ | 
					
						
							|  |  |  |     perf_jit_map_state.map_lock = PyThread_allocate_lock(); | 
					
						
							|  |  |  |     if (perf_jit_map_state.map_lock == NULL) { | 
					
						
							|  |  |  |         fclose(perf_jit_map_state.perf_map); | 
					
						
							|  |  |  |         return NULL;  // Failed to create lock
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Initialize code ID counter */ | 
					
						
							|  |  |  |     perf_jit_map_state.code_id = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Configure trampoline API with padding information */ | 
					
						
							|  |  |  |     trampoline_api.code_padding = PERF_JIT_CODE_PADDING; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return &perf_jit_map_state; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              MAIN JITDUMP ENTRY WRITING
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Write a complete jitdump entry for a Python function | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This is the main function called by Python's trampoline system whenever | 
					
						
							|  |  |  |  * a new piece of JIT-compiled code needs to be recorded. It writes both | 
					
						
							|  |  |  |  * the unwinding information and the code load event to the jitdump file. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * The function performs these steps: | 
					
						
							|  |  |  |  * 1. Initialize jitdump system if not already done | 
					
						
							|  |  |  |  * 2. Extract function name and filename from Python code object | 
					
						
							|  |  |  |  * 3. Generate DWARF unwinding information | 
					
						
							|  |  |  |  * 4. Write unwinding info event to jitdump file | 
					
						
							|  |  |  |  * 5. Write code load event to jitdump file | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   state: Jitdump state (currently unused, uses global state) | 
					
						
							|  |  |  |  *   code_addr: Address where the compiled code resides | 
					
						
							|  |  |  |  *   code_size: Size of the compiled code in bytes | 
					
						
							|  |  |  |  *   co: Python code object containing metadata | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * IMPORTANT: This function signature is part of Python's internal API | 
					
						
							|  |  |  |  * and must not be changed without coordinating with core Python development. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static void perf_map_jit_write_entry(void *state, const void *code_addr, | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |                                     unsigned int code_size, PyCodeObject *co) | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Initialize jitdump system on first use */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     if (perf_jit_map_state.perf_map == NULL) { | 
					
						
							|  |  |  |         void* ret = perf_map_jit_init(); | 
					
						
							|  |  |  |         if(ret == NULL){ | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |             return;  // Initialization failed, silently abort
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Extract function information from Python code object | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * We create a human-readable function name by combining the qualified | 
					
						
							|  |  |  |      * name (includes class/module context) with the filename. This helps | 
					
						
							|  |  |  |      * developers identify functions in perf reports. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     const char *entry = ""; | 
					
						
							|  |  |  |     if (co->co_qualname != NULL) { | 
					
						
							|  |  |  |         entry = PyUnicode_AsUTF8(co->co_qualname); | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     const char *filename = ""; | 
					
						
							|  |  |  |     if (co->co_filename != NULL) { | 
					
						
							|  |  |  |         filename = PyUnicode_AsUTF8(co->co_filename); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Create formatted function name for perf display | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * Format: "py::<function_name>:<filename>" | 
					
						
							|  |  |  |      * The "py::" prefix helps identify Python functions in mixed-language | 
					
						
							|  |  |  |      * profiles (e.g., when profiling C extensions alongside Python code). | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; | 
					
						
							|  |  |  |     char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); | 
					
						
							|  |  |  |     if (perf_map_entry == NULL) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         return;  // Memory allocation failed
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							|  |  |  |     snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const size_t name_length = strlen(perf_map_entry); | 
					
						
							|  |  |  |     uword base = (uword)code_addr; | 
					
						
							|  |  |  |     uword size = code_size; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Generate DWARF unwinding information | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * DWARF data is essential for proper stack unwinding during profiling. | 
					
						
							|  |  |  |      * Without it, perf cannot generate accurate call graphs, especially | 
					
						
							|  |  |  |      * in optimized code where frame pointers may be omitted. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ELFObjectContext ctx; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     char buffer[1024];  // Buffer for DWARF data (1KB should be sufficient)
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ctx.code_size = code_size; | 
					
						
							|  |  |  |     ctx.startp = ctx.p = (uint8_t*)buffer; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Generate EH frame (Exception Handling frame) data */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     elf_init_ehframe(&ctx); | 
					
						
							|  |  |  |     int eh_frame_size = ctx.p - ctx.startp; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Write Code Unwinding Information Event | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * This event must be written before the code load event to ensure | 
					
						
							|  |  |  |      * perf has the unwinding information available when it processes | 
					
						
							|  |  |  |      * the code region. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     CodeUnwindingInfoEvent ev2; | 
					
						
							|  |  |  |     ev2.base.event = PerfUnwindingInfo; | 
					
						
							|  |  |  |     ev2.base.time_stamp = get_current_monotonic_ticks(); | 
					
						
							|  |  |  |     ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Verify we don't exceed our padding budget */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     ev2.mapped_size = round_up(ev2.unwind_data_size, 16);  // 16-byte alignment
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Calculate total event size with padding */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     int content_size = sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     int padding_size = round_up(content_size, 8) - content_size;  // 8-byte align
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ev2.base.size = content_size + padding_size; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Write the unwinding info event header */ | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&ev2, sizeof(ev2)); | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Write EH Frame Header | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * The EH frame header provides metadata about the DWARF unwinding | 
					
						
							|  |  |  |      * information that follows. It includes pointers and counts that | 
					
						
							|  |  |  |      * help perf navigate the unwinding data efficiently. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     EhFrameHeader f; | 
					
						
							|  |  |  |     f.version = 1; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel;  // PC-relative signed 4-byte
 | 
					
						
							|  |  |  |     f.fde_count_enc = DwarfUData4;                  // Unsigned 4-byte count
 | 
					
						
							|  |  |  |     f.table_enc = DwarfSData4 | DwarfDataRel;       // Data-relative signed 4-byte
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Calculate relative offsets for EH frame navigation */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     f.eh_frame_ptr = -(eh_frame_size + 4 * sizeof(unsigned char)); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     f.eh_fde_count = 1;  // We generate exactly one FDE per function
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     f.from = -(round_up(code_size, 8) + eh_frame_size); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     int cie_size = ctx.eh_frame_p - ctx.startp; | 
					
						
							|  |  |  |     f.to = -(eh_frame_size - cie_size); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Write EH frame data and header */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     perf_map_jit_write_fully(ctx.startp, eh_frame_size); | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&f, sizeof(f)); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Write padding to maintain alignment */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     char padding_bytes[] = "\0\0\0\0\0\0\0\0"; | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&padding_bytes, padding_size); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Write Code Load Event | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * This event tells perf about the new code region. It includes: | 
					
						
							|  |  |  |      * - Memory addresses and sizes | 
					
						
							|  |  |  |      * - Process and thread identification | 
					
						
							|  |  |  |      * - Function name for symbol resolution | 
					
						
							|  |  |  |      * - The actual machine code bytes | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     CodeLoadEvent ev; | 
					
						
							|  |  |  |     ev.base.event = PerfLoad; | 
					
						
							|  |  |  |     ev.base.size = sizeof(ev) + (name_length+1) + size; | 
					
						
							|  |  |  |     ev.base.time_stamp = get_current_monotonic_ticks(); | 
					
						
							|  |  |  |     ev.process_id = getpid(); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     ev.thread_id = syscall(SYS_gettid);  // Get thread ID via system call
 | 
					
						
							|  |  |  |     ev.vma = base;                       // Virtual memory address
 | 
					
						
							|  |  |  |     ev.code_address = base;              // Same as VMA for our use case
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ev.code_size = size; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Assign unique code ID and increment counter */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     perf_jit_map_state.code_id += 1; | 
					
						
							|  |  |  |     ev.code_id = perf_jit_map_state.code_id; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /* Write code load event and associated data */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     perf_map_jit_write_fully(&ev, sizeof(ev)); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     perf_map_jit_write_fully(perf_map_entry, name_length+1);  // Include null terminator
 | 
					
						
							|  |  |  |     perf_map_jit_write_fully((void*)(base), size);           // Copy actual machine code
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Clean up allocated memory */ | 
					
						
							|  |  |  |     PyMem_RawFree(perf_map_entry); | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              CLEANUP AND FINALIZATION
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Finalize and cleanup the perf jitdump system | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This function is called when Python is shutting down or when the | 
					
						
							|  |  |  |  * perf trampoline system is being disabled. It ensures all resources | 
					
						
							|  |  |  |  * are properly released and all buffered data is flushed to disk. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Args: | 
					
						
							|  |  |  |  *   state: Jitdump state (currently unused, uses global state) | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Returns: 0 on success | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * IMPORTANT: This function signature is part of Python's internal API | 
					
						
							|  |  |  |  * and must not be changed without coordinating with core Python development. | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | static int perf_map_jit_fini(void* state) { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     /*
 | 
					
						
							|  |  |  |      * Close jitdump file with proper synchronization | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * We need to acquire the lock to ensure no other threads are | 
					
						
							|  |  |  |      * writing to the file when we close it. This prevents corruption | 
					
						
							|  |  |  |      * and ensures all data is properly flushed. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     if (perf_jit_map_state.perf_map != NULL) { | 
					
						
							|  |  |  |         PyThread_acquire_lock(perf_jit_map_state.map_lock, 1); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         fclose(perf_jit_map_state.perf_map);  // This also flushes buffers
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         PyThread_release_lock(perf_jit_map_state.map_lock); | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         /* Clean up synchronization primitive */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         PyThread_free_lock(perf_jit_map_state.map_lock); | 
					
						
							|  |  |  |         perf_jit_map_state.perf_map = NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /*
 | 
					
						
							|  |  |  |      * Unmap the memory region | 
					
						
							|  |  |  |      * | 
					
						
							|  |  |  |      * This removes the signal to perf that we were generating JIT code. | 
					
						
							|  |  |  |      * After this point, perf will no longer detect this process as | 
					
						
							|  |  |  |      * having JIT capabilities. | 
					
						
							|  |  |  |      */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     if (perf_jit_map_state.mapped_buffer != NULL) { | 
					
						
							|  |  |  |         munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size); | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |         perf_jit_map_state.mapped_buffer = NULL; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     /* Clear global state reference */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     trampoline_api.state = NULL; | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return 0;  // Success
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | // =============================================================================
 | 
					
						
							|  |  |  | //                              PUBLIC API EXPORT
 | 
					
						
							|  |  |  | // =============================================================================
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  |  * Python Perf Callbacks Structure | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * This structure defines the callback interface that Python's trampoline | 
					
						
							|  |  |  |  * system uses to integrate with perf profiling. It contains function | 
					
						
							|  |  |  |  * pointers for initialization, event writing, and cleanup. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * CRITICAL: This structure and its contents are part of Python's internal | 
					
						
							|  |  |  |  * API. The function signatures and behavior must remain stable to maintain | 
					
						
							|  |  |  |  * compatibility with the Python interpreter's perf integration system. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Used by: Python's _PyPerf_Callbacks system in pycore_ceval.h | 
					
						
							|  |  |  |  */ | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  |     &perf_map_jit_init,        // Initialization function
 | 
					
						
							|  |  |  |     &perf_map_jit_write_entry, // Event writing function
 | 
					
						
							|  |  |  |     &perf_map_jit_fini,        // Cleanup function
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-25 21:37:15 +01:00
										 |  |  | #endif /* PY_HAVE_PERF_TRAMPOLINE */
 |