| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #include "Python.h"
 | 
					
						
							|  |  |  | #include "pycore_ceval.h"         // _PyPerf_Callbacks
 | 
					
						
							|  |  |  | #include "pycore_frame.h"
 | 
					
						
							|  |  |  | #include "pycore_interp.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <fcntl.h>
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							|  |  |  | #include <sys/mman.h>             // mmap()
 | 
					
						
							|  |  |  | #include <sys/types.h>
 | 
					
						
							|  |  |  | #include <unistd.h>               // sysconf()
 | 
					
						
							|  |  |  | #include <sys/time.h>           // gettimeofday()
 | 
					
						
							| 
									
										
										
										
											2024-05-05 14:37:32 +02:00
										 |  |  | #include <sys/syscall.h>
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | // ----------------------------------
 | 
					
						
							|  |  |  | //         Perf jitdump API
 | 
					
						
							|  |  |  | // ----------------------------------
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     FILE* perf_map; | 
					
						
							|  |  |  |     PyThread_type_lock map_lock; | 
					
						
							|  |  |  |     void* mapped_buffer; | 
					
						
							|  |  |  |     size_t mapped_size; | 
					
						
							|  |  |  |     int code_id; | 
					
						
							|  |  |  | } PerfMapJitState; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PerfMapJitState perf_jit_map_state; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /*
 | 
					
						
							|  |  |  | Usually the binary and libraries are mapped in separate region like below: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   address -> | 
					
						
							|  |  |  |    --+---------------------+--//--+---------------------+--
 | 
					
						
							|  |  |  |      | .text | .data | ... |      | .text | .data | ... | | 
					
						
							|  |  |  |    --+---------------------+--//--+---------------------+--
 | 
					
						
							|  |  |  |          myprog                      libc.so | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | So it'd be easy and straight-forward to find a mapped binary or library from an | 
					
						
							|  |  |  | address. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | But for JIT code, the code arena only cares about the code section. But the | 
					
						
							|  |  |  | resulting DSOs (which is generated by perf inject -j) contain ELF headers and | 
					
						
							|  |  |  | unwind info too. Then it'd generate following address space with synthesized | 
					
						
							|  |  |  | MMAP events. Let's say it has a sample between address B and C. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                                                sample | 
					
						
							|  |  |  |                                                  | | 
					
						
							|  |  |  |   address ->                         A       B   v   C | 
					
						
							|  |  |  |   --------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  |   /tmp/jitted-PID-0.so   | (headers) | .text | unwind info | | 
					
						
							|  |  |  |   /tmp/jitted-PID-1.so           | (headers) | .text | unwind info | | 
					
						
							|  |  |  |   /tmp/jitted-PID-2.so                   | (headers) | .text | unwind info | | 
					
						
							|  |  |  |     ... | 
					
						
							|  |  |  |   --------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see | 
					
						
							|  |  |  | the unwind info. If it maps both .text section and unwind sections, the sample | 
					
						
							|  |  |  | could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing | 
					
						
							|  |  |  | which one is right. So to make perf happy we have non-overlapping ranges for each | 
					
						
							|  |  |  | DSO: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   address -> | 
					
						
							|  |  |  |   ------------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  |   /tmp/jitted-PID-0.so   | (headers) | .text | unwind info | | 
					
						
							|  |  |  |   /tmp/jitted-PID-1.so                         | (headers) | .text | unwind info | | 
					
						
							|  |  |  |   /tmp/jitted-PID-2.so                                               | (headers) | .text | unwind info | | 
					
						
							|  |  |  |     ... | 
					
						
							|  |  |  |   ------------------------------------------------------------------------------------------------------- | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | As the trampolines are constant, we add a constant padding but in general the padding needs to have the | 
					
						
							|  |  |  | size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define PERF_JIT_CODE_PADDING 0x100
 | 
					
						
							|  |  |  | #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef uint64_t uword; | 
					
						
							|  |  |  | typedef const char* CodeComments; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define Pd "d"
 | 
					
						
							|  |  |  | #define MB (1024 * 1024)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define EM_386      3
 | 
					
						
							|  |  |  | #define EM_X86_64   62
 | 
					
						
							|  |  |  | #define EM_ARM      40
 | 
					
						
							|  |  |  | #define EM_AARCH64  183
 | 
					
						
							|  |  |  | #define EM_RISCV    243
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define TARGET_ARCH_IA32   0
 | 
					
						
							|  |  |  | #define TARGET_ARCH_X64    0
 | 
					
						
							|  |  |  | #define TARGET_ARCH_ARM    0
 | 
					
						
							|  |  |  | #define TARGET_ARCH_ARM64  0
 | 
					
						
							|  |  |  | #define TARGET_ARCH_RISCV32 0
 | 
					
						
							|  |  |  | #define TARGET_ARCH_RISCV64 0
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define FLAG_generate_perf_jitdump 0
 | 
					
						
							|  |  |  | #define FLAG_write_protect_code 0
 | 
					
						
							|  |  |  | #define FLAG_write_protect_vm_isolate 0
 | 
					
						
							|  |  |  | #define FLAG_code_comments 0
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define UNREACHABLE()
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static uword GetElfMachineArchitecture(void) { | 
					
						
							|  |  |  | #if TARGET_ARCH_IA32
 | 
					
						
							|  |  |  |     return EM_386; | 
					
						
							|  |  |  | #elif TARGET_ARCH_X64
 | 
					
						
							|  |  |  |     return EM_X86_64; | 
					
						
							|  |  |  | #elif TARGET_ARCH_ARM
 | 
					
						
							|  |  |  |     return EM_ARM; | 
					
						
							|  |  |  | #elif TARGET_ARCH_ARM64
 | 
					
						
							|  |  |  |     return EM_AARCH64; | 
					
						
							|  |  |  | #elif TARGET_ARCH_RISCV32 || TARGET_ARCH_RISCV64
 | 
					
						
							|  |  |  |     return EM_RISCV; | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     UNREACHABLE(); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     uint32_t magic; | 
					
						
							|  |  |  |     uint32_t version; | 
					
						
							|  |  |  |     uint32_t size; | 
					
						
							|  |  |  |     uint32_t elf_mach_target; | 
					
						
							|  |  |  |     uint32_t reserved; | 
					
						
							|  |  |  |     uint32_t process_id; | 
					
						
							|  |  |  |     uint64_t time_stamp; | 
					
						
							|  |  |  |     uint64_t flags; | 
					
						
							|  |  |  | } Header; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  enum PerfEvent { | 
					
						
							|  |  |  |     PerfLoad = 0, | 
					
						
							|  |  |  |     PerfMove = 1, | 
					
						
							|  |  |  |     PerfDebugInfo = 2, | 
					
						
							|  |  |  |     PerfClose = 3, | 
					
						
							|  |  |  |     PerfUnwindingInfo = 4 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct BaseEvent { | 
					
						
							|  |  |  |     uint32_t event; | 
					
						
							|  |  |  |     uint32_t size; | 
					
						
							|  |  |  |     uint64_t time_stamp; | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     struct BaseEvent base; | 
					
						
							|  |  |  |     uint32_t process_id; | 
					
						
							|  |  |  |     uint32_t thread_id; | 
					
						
							|  |  |  |     uint64_t vma; | 
					
						
							|  |  |  |     uint64_t code_address; | 
					
						
							|  |  |  |     uint64_t code_size; | 
					
						
							|  |  |  |     uint64_t code_id; | 
					
						
							|  |  |  | } CodeLoadEvent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     struct BaseEvent base; | 
					
						
							|  |  |  |     uint64_t unwind_data_size; | 
					
						
							|  |  |  |     uint64_t eh_frame_hdr_size; | 
					
						
							|  |  |  |     uint64_t mapped_size; | 
					
						
							|  |  |  | } CodeUnwindingInfoEvent; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const intptr_t nanoseconds_per_second = 1000000000; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // Dwarf encoding constants
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static const uint8_t DwarfUData4 = 0x03; | 
					
						
							|  |  |  | static const uint8_t DwarfSData4 = 0x0b; | 
					
						
							|  |  |  | static const uint8_t DwarfPcRel = 0x10; | 
					
						
							|  |  |  | static const uint8_t DwarfDataRel = 0x30; | 
					
						
							|  |  |  | // static uint8_t DwarfOmit = 0xff;
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     unsigned char version; | 
					
						
							|  |  |  |     unsigned char eh_frame_ptr_enc; | 
					
						
							| 
									
										
										
										
											2024-05-11 16:19:31 -07:00
										 |  |  |     unsigned char fde_count_enc; | 
					
						
							|  |  |  |     unsigned char table_enc; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     int32_t eh_frame_ptr; | 
					
						
							|  |  |  |     int32_t eh_fde_count; | 
					
						
							|  |  |  |     int32_t from; | 
					
						
							|  |  |  |     int32_t to; | 
					
						
							|  |  |  | } EhFrameHeader; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int64_t get_current_monotonic_ticks(void) { | 
					
						
							|  |  |  |     struct timespec ts; | 
					
						
							|  |  |  |     if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { | 
					
						
							|  |  |  |         UNREACHABLE(); | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     // Convert to nanoseconds.
 | 
					
						
							|  |  |  |     int64_t result = ts.tv_sec; | 
					
						
							|  |  |  |     result *= nanoseconds_per_second; | 
					
						
							|  |  |  |     result += ts.tv_nsec; | 
					
						
							|  |  |  |     return result; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int64_t get_current_time_microseconds(void) { | 
					
						
							|  |  |  |   // gettimeofday has microsecond resolution.
 | 
					
						
							|  |  |  |   struct timeval tv; | 
					
						
							|  |  |  |   if (gettimeofday(&tv, NULL) < 0) { | 
					
						
							|  |  |  |     UNREACHABLE(); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static size_t round_up(int64_t value, int64_t multiple) { | 
					
						
							|  |  |  |     if (multiple == 0) { | 
					
						
							|  |  |  |         // Avoid division by zero
 | 
					
						
							|  |  |  |         return value; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int64_t remainder = value % multiple; | 
					
						
							|  |  |  |     if (remainder == 0) { | 
					
						
							|  |  |  |         // Value is already a multiple of 'multiple'
 | 
					
						
							|  |  |  |         return value; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Calculate the difference to the next multiple
 | 
					
						
							|  |  |  |     int64_t difference = multiple - remainder; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Add the difference to the value
 | 
					
						
							|  |  |  |     int64_t rounded_up_value = value + difference; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return rounded_up_value; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void perf_map_jit_write_fully(const void* buffer, size_t size) { | 
					
						
							|  |  |  |     FILE* out_file = perf_jit_map_state.perf_map; | 
					
						
							|  |  |  |     const char* ptr = (const char*)(buffer); | 
					
						
							|  |  |  |     while (size > 0) { | 
					
						
							|  |  |  |         const size_t written = fwrite(ptr, 1, size, out_file); | 
					
						
							|  |  |  |         if (written == 0) { | 
					
						
							|  |  |  |             UNREACHABLE(); | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         size -= written; | 
					
						
							|  |  |  |         ptr += written; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void perf_map_jit_write_header(int pid, FILE* out_file) { | 
					
						
							|  |  |  |     Header header; | 
					
						
							|  |  |  |     header.magic = 0x4A695444; | 
					
						
							|  |  |  |     header.version = 1; | 
					
						
							|  |  |  |     header.size = sizeof(Header); | 
					
						
							|  |  |  |     header.elf_mach_target = GetElfMachineArchitecture(); | 
					
						
							|  |  |  |     header.process_id = pid; | 
					
						
							|  |  |  |     header.time_stamp = get_current_time_microseconds(); | 
					
						
							|  |  |  |     header.flags = 0; | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&header, sizeof(header)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void* perf_map_jit_init(void) { | 
					
						
							|  |  |  |     char filename[100]; | 
					
						
							|  |  |  |     int pid = getpid(); | 
					
						
							|  |  |  |     snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); | 
					
						
							|  |  |  |     const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); | 
					
						
							|  |  |  |     if (fd == -1) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const long page_size = sysconf(_SC_PAGESIZE);  // NOLINT(runtime/int)
 | 
					
						
							|  |  |  |     if (page_size == -1) { | 
					
						
							|  |  |  |         close(fd); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // The perf jit interface forces us to map the first page of the file
 | 
					
						
							|  |  |  |     // to signal that we are using the interface.
 | 
					
						
							|  |  |  |     perf_jit_map_state.mapped_buffer = mmap(NULL, page_size, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0); | 
					
						
							|  |  |  |     if (perf_jit_map_state.mapped_buffer == NULL) { | 
					
						
							|  |  |  |         close(fd); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     perf_jit_map_state.mapped_size = page_size; | 
					
						
							|  |  |  |     perf_jit_map_state.perf_map = fdopen(fd, "w+"); | 
					
						
							|  |  |  |     if (perf_jit_map_state.perf_map == NULL) { | 
					
						
							|  |  |  |         close(fd); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); | 
					
						
							|  |  |  |     perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     perf_jit_map_state.map_lock = PyThread_allocate_lock(); | 
					
						
							|  |  |  |     if (perf_jit_map_state.map_lock == NULL) { | 
					
						
							|  |  |  |         fclose(perf_jit_map_state.perf_map); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     perf_jit_map_state.code_id = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-06 18:20:42 +01:00
										 |  |  |     trampoline_api.code_padding = PERF_JIT_CODE_PADDING; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     return &perf_jit_map_state; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* DWARF definitions. */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #define DWRF_CIE_VERSION 1
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  |     DWRF_CFA_nop = 0x0, | 
					
						
							|  |  |  |     DWRF_CFA_offset_extended = 0x5, | 
					
						
							|  |  |  |     DWRF_CFA_def_cfa = 0xc, | 
					
						
							|  |  |  |     DWRF_CFA_def_cfa_offset = 0xe, | 
					
						
							|  |  |  |     DWRF_CFA_offset_extended_sf = 0x11, | 
					
						
							|  |  |  |     DWRF_CFA_advance_loc = 0x40, | 
					
						
							|  |  |  |     DWRF_CFA_offset = 0x80 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum | 
					
						
							|  |  |  |   { | 
					
						
							|  |  |  |     DWRF_EH_PE_absptr = 0x00, | 
					
						
							|  |  |  |     DWRF_EH_PE_omit = 0xff, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* FDE data encoding.  */ | 
					
						
							|  |  |  |     DWRF_EH_PE_uleb128 = 0x01, | 
					
						
							|  |  |  |     DWRF_EH_PE_udata2 = 0x02, | 
					
						
							|  |  |  |     DWRF_EH_PE_udata4 = 0x03, | 
					
						
							|  |  |  |     DWRF_EH_PE_udata8 = 0x04, | 
					
						
							|  |  |  |     DWRF_EH_PE_sleb128 = 0x09, | 
					
						
							|  |  |  |     DWRF_EH_PE_sdata2 = 0x0a, | 
					
						
							|  |  |  |     DWRF_EH_PE_sdata4 = 0x0b, | 
					
						
							|  |  |  |     DWRF_EH_PE_sdata8 = 0x0c, | 
					
						
							|  |  |  |     DWRF_EH_PE_signed = 0x08, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* FDE flags.  */ | 
					
						
							|  |  |  |     DWRF_EH_PE_pcrel = 0x10, | 
					
						
							|  |  |  |     DWRF_EH_PE_textrel = 0x20, | 
					
						
							|  |  |  |     DWRF_EH_PE_datarel = 0x30, | 
					
						
							|  |  |  |     DWRF_EH_PE_funcrel = 0x40, | 
					
						
							|  |  |  |     DWRF_EH_PE_aligned = 0x50, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     DWRF_EH_PE_indirect = 0x80 | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { DWRF_TAG_compile_unit = 0x11 }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { DWRF_children_no = 0, DWRF_children_yes = 1 }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { DWRF_AT_name = 0x03, DWRF_AT_stmt_list = 0x10, DWRF_AT_low_pc = 0x11, DWRF_AT_high_pc = 0x12 }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { DWRF_FORM_addr = 0x01, DWRF_FORM_data4 = 0x06, DWRF_FORM_string = 0x08 }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { DWRF_LNS_extended_op = 0, DWRF_LNS_copy = 1, DWRF_LNS_advance_pc = 2, DWRF_LNS_advance_line = 3 }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { DWRF_LNE_end_sequence = 1, DWRF_LNE_set_address = 2 }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | enum { | 
					
						
							|  |  |  | #ifdef __x86_64__
 | 
					
						
							|  |  |  |     /* Yes, the order is strange, but correct. */ | 
					
						
							|  |  |  |     DWRF_REG_AX, | 
					
						
							|  |  |  |     DWRF_REG_DX, | 
					
						
							|  |  |  |     DWRF_REG_CX, | 
					
						
							|  |  |  |     DWRF_REG_BX, | 
					
						
							|  |  |  |     DWRF_REG_SI, | 
					
						
							|  |  |  |     DWRF_REG_DI, | 
					
						
							|  |  |  |     DWRF_REG_BP, | 
					
						
							|  |  |  |     DWRF_REG_SP, | 
					
						
							|  |  |  |     DWRF_REG_8, | 
					
						
							|  |  |  |     DWRF_REG_9, | 
					
						
							|  |  |  |     DWRF_REG_10, | 
					
						
							|  |  |  |     DWRF_REG_11, | 
					
						
							|  |  |  |     DWRF_REG_12, | 
					
						
							|  |  |  |     DWRF_REG_13, | 
					
						
							|  |  |  |     DWRF_REG_14, | 
					
						
							|  |  |  |     DWRF_REG_15, | 
					
						
							|  |  |  |     DWRF_REG_RA, | 
					
						
							|  |  |  | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
 | 
					
						
							|  |  |  |     DWRF_REG_SP = 31, | 
					
						
							|  |  |  |     DWRF_REG_RA = 30, | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #    error "Unsupported target architecture"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct ELFObjectContext | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint8_t* p; /* Pointer to next address in obj.space. */ | 
					
						
							|  |  |  |     uint8_t* startp; /* Pointer to start address in obj.space. */ | 
					
						
							|  |  |  |     uint8_t* eh_frame_p; /* Pointer to start address in obj.space. */ | 
					
						
							|  |  |  |     uint32_t code_size; /* Size of machine code. */ | 
					
						
							|  |  |  | } ELFObjectContext; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Append a null-terminated string. */ | 
					
						
							|  |  |  | static uint32_t | 
					
						
							|  |  |  | elfctx_append_string(ELFObjectContext* ctx, const char* str) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint8_t* p = ctx->p; | 
					
						
							|  |  |  |     uint32_t ofs = (uint32_t)(p - ctx->startp); | 
					
						
							|  |  |  |     do { | 
					
						
							|  |  |  |         *p++ = (uint8_t)*str; | 
					
						
							|  |  |  |     } while (*str++); | 
					
						
							|  |  |  |     ctx->p = p; | 
					
						
							|  |  |  |     return ofs; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Append a SLEB128 value. */ | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | elfctx_append_sleb128(ELFObjectContext* ctx, int32_t v) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint8_t* p = ctx->p; | 
					
						
							|  |  |  |     for (; (uint32_t)(v + 0x40) >= 0x80; v >>= 7) { | 
					
						
							|  |  |  |         *p++ = (uint8_t)((v & 0x7f) | 0x80); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     *p++ = (uint8_t)(v & 0x7f); | 
					
						
							|  |  |  |     ctx->p = p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Append a ULEB128 to buffer. */ | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | elfctx_append_uleb128(ELFObjectContext* ctx, uint32_t v) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint8_t* p = ctx->p; | 
					
						
							|  |  |  |     for (; v >= 0x80; v >>= 7) { | 
					
						
							|  |  |  |         *p++ = (char)((v & 0x7f) | 0x80); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     *p++ = (char)v; | 
					
						
							|  |  |  |     ctx->p = p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Shortcuts to generate DWARF structures. */ | 
					
						
							|  |  |  | #define DWRF_U8(x) (*p++ = (x))
 | 
					
						
							|  |  |  | #define DWRF_I8(x) (*(int8_t*)p = (x), p++)
 | 
					
						
							|  |  |  | #define DWRF_U16(x) (*(uint16_t*)p = (x), p += 2)
 | 
					
						
							|  |  |  | #define DWRF_U32(x) (*(uint32_t*)p = (x), p += 4)
 | 
					
						
							|  |  |  | #define DWRF_ADDR(x) (*(uintptr_t*)p = (x), p += sizeof(uintptr_t))
 | 
					
						
							|  |  |  | #define DWRF_UV(x) (ctx->p = p, elfctx_append_uleb128(ctx, (x)), p = ctx->p)
 | 
					
						
							|  |  |  | #define DWRF_SV(x) (ctx->p = p, elfctx_append_sleb128(ctx, (x)), p = ctx->p)
 | 
					
						
							|  |  |  | #define DWRF_STR(str) (ctx->p = p, elfctx_append_string(ctx, (str)), p = ctx->p)
 | 
					
						
							|  |  |  | #define DWRF_ALIGNNOP(s)                                                                                \
 | 
					
						
							|  |  |  |     while ((uintptr_t)p & ((s)-1)) {                                                                    \ | 
					
						
							|  |  |  |         *p++ = DWRF_CFA_nop;                                                                            \ | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #define DWRF_SECTION(name, stmt)                                                                        \
 | 
					
						
							|  |  |  |     {                                                                                                   \ | 
					
						
							|  |  |  |         uint32_t* szp_##name = (uint32_t*)p;                                                            \ | 
					
						
							|  |  |  |         p += 4;                                                                                         \ | 
					
						
							|  |  |  |         stmt;                                                                                           \ | 
					
						
							|  |  |  |         *szp_##name = (uint32_t)((p - (uint8_t*)szp_##name) - 4);                                       \ | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /* Initialize .eh_frame section. */ | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | elf_init_ehframe(ELFObjectContext* ctx) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     uint8_t* p = ctx->p; | 
					
						
							|  |  |  |     uint8_t* framep = p; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Emit DWARF EH CIE. */ | 
					
						
							|  |  |  |     DWRF_SECTION(CIE, DWRF_U32(0); /* Offset to CIE itself. */ | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CIE_VERSION); | 
					
						
							|  |  |  |                  DWRF_STR("zR"); /* Augmentation. */ | 
					
						
							|  |  |  |                  DWRF_UV(1); /* Code alignment factor. */ | 
					
						
							|  |  |  |                  DWRF_SV(-(int64_t)sizeof(uintptr_t)); /* Data alignment factor. */ | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_REG_RA); /* Return address register. */ | 
					
						
							|  |  |  |                  DWRF_UV(1); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_EH_PE_pcrel | DWRF_EH_PE_sdata4); /* Augmentation data. */ | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_def_cfa); DWRF_UV(DWRF_REG_SP); DWRF_UV(sizeof(uintptr_t)); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_offset|DWRF_REG_RA); DWRF_UV(1); | 
					
						
							|  |  |  |                  DWRF_ALIGNNOP(sizeof(uintptr_t)); | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ctx->eh_frame_p = p; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /* Emit DWARF EH FDE. */ | 
					
						
							|  |  |  |     DWRF_SECTION(FDE, DWRF_U32((uint32_t)(p - framep)); /* Offset to CIE. */ | 
					
						
							|  |  |  |                  DWRF_U32(-0x30); /* Machine code offset relative to .text. */ | 
					
						
							|  |  |  |                  DWRF_U32(ctx->code_size); /* Machine code length. */ | 
					
						
							|  |  |  |                  DWRF_U8(0); /* Augmentation data. */ | 
					
						
							|  |  |  |     /* Registers saved in CFRAME. */ | 
					
						
							|  |  |  | #ifdef __x86_64__
 | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_advance_loc | 4); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_advance_loc | 6); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(8); | 
					
						
							|  |  |  |     /* Extra registers saved for JIT-compiled code. */ | 
					
						
							|  |  |  | #elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
 | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_advance_loc | 1); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_def_cfa_offset); DWRF_UV(16); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_offset | 29); DWRF_UV(2); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_offset | 30); DWRF_UV(1); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_advance_loc | 3); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_offset | -(64 - 29)); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_offset | -(64 - 30)); | 
					
						
							|  |  |  |                  DWRF_U8(DWRF_CFA_def_cfa_offset); | 
					
						
							|  |  |  |                  DWRF_UV(0); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #    error "Unsupported target architecture"
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |                  DWRF_ALIGNNOP(sizeof(uintptr_t));) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ctx->p = p; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void perf_map_jit_write_entry(void *state, const void *code_addr, | 
					
						
							|  |  |  |                          unsigned int code_size, PyCodeObject *co) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (perf_jit_map_state.perf_map == NULL) { | 
					
						
							|  |  |  |         void* ret = perf_map_jit_init(); | 
					
						
							|  |  |  |         if(ret == NULL){ | 
					
						
							|  |  |  |             return; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const char *entry = ""; | 
					
						
							|  |  |  |     if (co->co_qualname != NULL) { | 
					
						
							|  |  |  |         entry = PyUnicode_AsUTF8(co->co_qualname); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     const char *filename = ""; | 
					
						
							|  |  |  |     if (co->co_filename != NULL) { | 
					
						
							|  |  |  |         filename = PyUnicode_AsUTF8(co->co_filename); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; | 
					
						
							|  |  |  |     char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); | 
					
						
							|  |  |  |     if (perf_map_entry == NULL) { | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const size_t name_length = strlen(perf_map_entry); | 
					
						
							|  |  |  |     uword base = (uword)code_addr; | 
					
						
							|  |  |  |     uword size = code_size; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Write the code unwinding info event.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Create unwinding information (eh frame)
 | 
					
						
							|  |  |  |     ELFObjectContext ctx; | 
					
						
							|  |  |  |     char buffer[1024]; | 
					
						
							|  |  |  |     ctx.code_size = code_size; | 
					
						
							|  |  |  |     ctx.startp = ctx.p = (uint8_t*)buffer; | 
					
						
							|  |  |  |     elf_init_ehframe(&ctx); | 
					
						
							|  |  |  |     int eh_frame_size = ctx.p - ctx.startp; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Populate the unwind info event for perf
 | 
					
						
							|  |  |  |     CodeUnwindingInfoEvent ev2; | 
					
						
							|  |  |  |     ev2.base.event = PerfUnwindingInfo; | 
					
						
							|  |  |  |     ev2.base.time_stamp = get_current_monotonic_ticks(); | 
					
						
							|  |  |  |     ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; | 
					
						
							|  |  |  |     // Ensure we have enough space between DSOs when perf maps them
 | 
					
						
							|  |  |  |     assert(ev2.unwind_data_size <= PERF_JIT_CODE_PADDING); | 
					
						
							|  |  |  |     ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); | 
					
						
							|  |  |  |     ev2.mapped_size = round_up(ev2.unwind_data_size, 16); | 
					
						
							|  |  |  |     int content_size = sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size; | 
					
						
							|  |  |  |     int padding_size = round_up(content_size, 8) - content_size; | 
					
						
							|  |  |  |     ev2.base.size = content_size + padding_size; | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&ev2, sizeof(ev2)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Populate the eh Frame header
 | 
					
						
							|  |  |  |     EhFrameHeader f; | 
					
						
							|  |  |  |     f.version = 1; | 
					
						
							|  |  |  |     f.eh_frame_ptr_enc = DwarfSData4 | DwarfPcRel; | 
					
						
							|  |  |  |     f.fde_count_enc = DwarfUData4; | 
					
						
							|  |  |  |     f.table_enc = DwarfSData4 | DwarfDataRel; | 
					
						
							|  |  |  |     f.eh_frame_ptr = -(eh_frame_size + 4 * sizeof(unsigned char)); | 
					
						
							|  |  |  |     f.eh_fde_count = 1; | 
					
						
							|  |  |  |     f.from = -(round_up(code_size, 8) + eh_frame_size); | 
					
						
							|  |  |  |     int cie_size = ctx.eh_frame_p - ctx.startp; | 
					
						
							|  |  |  |     f.to = -(eh_frame_size - cie_size); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     perf_map_jit_write_fully(ctx.startp, eh_frame_size); | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&f, sizeof(f)); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     char padding_bytes[] = "\0\0\0\0\0\0\0\0"; | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&padding_bytes, padding_size); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Write the code load event.
 | 
					
						
							|  |  |  |     CodeLoadEvent ev; | 
					
						
							|  |  |  |     ev.base.event = PerfLoad; | 
					
						
							|  |  |  |     ev.base.size = sizeof(ev) + (name_length+1) + size; | 
					
						
							|  |  |  |     ev.base.time_stamp = get_current_monotonic_ticks(); | 
					
						
							|  |  |  |     ev.process_id = getpid(); | 
					
						
							| 
									
										
										
										
											2024-05-05 14:37:32 +02:00
										 |  |  |     ev.thread_id = syscall(SYS_gettid); | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     ev.vma = base; | 
					
						
							|  |  |  |     ev.code_address = base; | 
					
						
							|  |  |  |     ev.code_size = size; | 
					
						
							|  |  |  |     perf_jit_map_state.code_id += 1; | 
					
						
							|  |  |  |     ev.code_id = perf_jit_map_state.code_id; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     perf_map_jit_write_fully(&ev, sizeof(ev)); | 
					
						
							|  |  |  |     perf_map_jit_write_fully(perf_map_entry, name_length+1); | 
					
						
							|  |  |  |     perf_map_jit_write_fully((void*)(base), size); | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int perf_map_jit_fini(void* state) { | 
					
						
							|  |  |  |     if (perf_jit_map_state.perf_map != NULL) { | 
					
						
							|  |  |  |         // close the file
 | 
					
						
							|  |  |  |         PyThread_acquire_lock(perf_jit_map_state.map_lock, 1); | 
					
						
							|  |  |  |         fclose(perf_jit_map_state.perf_map); | 
					
						
							|  |  |  |         PyThread_release_lock(perf_jit_map_state.map_lock); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         // clean up the lock and state
 | 
					
						
							|  |  |  |         PyThread_free_lock(perf_jit_map_state.map_lock); | 
					
						
							|  |  |  |         perf_jit_map_state.perf_map = NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (perf_jit_map_state.mapped_buffer != NULL) { | 
					
						
							|  |  |  |         munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     trampoline_api.state = NULL; | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { | 
					
						
							|  |  |  |     &perf_map_jit_init, | 
					
						
							|  |  |  |     &perf_map_jit_write_entry, | 
					
						
							|  |  |  |     &perf_map_jit_fini, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #endif
 |