/* * Python Perf Trampoline Support - JIT Dump Implementation * * This file implements the perf jitdump API for Python's performance profiling * integration. It allows perf (Linux performance analysis tool) to understand * and profile dynamically generated Python bytecode by creating JIT dump files * that perf can inject into its analysis. * * * IMPORTANT: This file exports specific callback functions that are part of * Python's internal API. Do not modify the function signatures or behavior * of exported functions without coordinating with the Python core team. * * Usually the binary and libraries are mapped in separate region like below: * * address -> * --+---------------------+--//--+---------------------+-- * | .text | .data | ... | | .text | .data | ... | * --+---------------------+--//--+---------------------+-- * myprog libc.so * * So it'd be easy and straight-forward to find a mapped binary or library from an * address. * * But for JIT code, the code arena only cares about the code section. But the * resulting DSOs (which is generated by perf inject -j) contain ELF headers and * unwind info too. Then it'd generate following address space with synthesized * MMAP events. Let's say it has a sample between address B and C. * * sample * | * address -> A B v C * --------------------------------------------------------------------------------------------------- * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | * ... * --------------------------------------------------------------------------------------------------- * * If it only maps the .text section, it'd find the jitted-PID-1.so but cannot see * the unwind info. If it maps both .text section and unwind sections, the sample * could be mapped to either jitted-PID-0.so or jitted-PID-1.so and it's confusing * which one is right. So to make perf happy we have non-overlapping ranges for each * DSO: * * address -> * ------------------------------------------------------------------------------------------------------- * /tmp/jitted-PID-0.so | (headers) | .text | unwind info | * /tmp/jitted-PID-1.so | (headers) | .text | unwind info | * /tmp/jitted-PID-2.so | (headers) | .text | unwind info | * ... * ------------------------------------------------------------------------------------------------------- * * As the trampolines are constant, we add a constant padding but in general the padding needs to have the * size of the unwind info rounded to 16 bytes. In general, for our trampolines this is 0x50 */ #include "Python.h" #include "pycore_ceval.h" // _PyPerf_Callbacks #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_mmap.h" // _PyAnnotateMemoryMap() #include "pycore_jit_unwind.h" #include "pycore_runtime.h" // _PyRuntime #ifdef PY_HAVE_PERF_TRAMPOLINE /* Standard library includes for perf jitdump implementation */ #if defined(__linux__) # include // ELF architecture constants #endif #include // File control operations #include // Standard I/O operations #include // Standard library functions #include // memcpy, strlen #include // Memory mapping functions (mmap) #include // System data types #include // System calls (sysconf, getpid) #include // Time functions (gettimeofday) #if defined(__linux__) # include // System call interface #endif // ============================================================================= // CONSTANTS AND CONFIGURATION // ============================================================================= /* * Memory layout considerations for perf jitdump: * * Perf expects non-overlapping memory regions for each JIT-compiled function. * When perf processes the jitdump file, it creates synthetic DSO (Dynamic * Shared Object) files that contain: * - ELF headers * - .text section (actual machine code) * - Unwind information (for stack traces) * * To ensure proper address space layout, we add padding between code regions. * This prevents address conflicts when perf maps the synthesized DSOs. * * Memory layout example: * /tmp/jitted-PID-0.so: [headers][.text][unwind_info][padding] * /tmp/jitted-PID-1.so: [headers][.text][unwind_info][padding] * * The padding size is now calculated automatically during initialization * based on the actual unwind information requirements. */ /* These constants are defined inside , which we can't use outside of linux. */ #if !defined(__linux__) # if defined(__i386__) || defined(_M_IX86) # define EM_386 3 # elif defined(__arm__) || defined(_M_ARM) # define EM_ARM 40 # elif defined(__x86_64__) || defined(_M_X64) # define EM_X86_64 62 # elif defined(__aarch64__) # define EM_AARCH64 183 # elif defined(__riscv) # define EM_RISCV 243 # endif #endif /* Convenient access to the global trampoline API state */ #define trampoline_api _PyRuntime.ceval.perf.trampoline_api /* Type aliases for clarity and portability */ typedef uint64_t uword; // Word-sized unsigned integer typedef const char* CodeComments; // Code comment strings /* Memory size constants */ #define MB (1024 * 1024) // 1 Megabyte for buffer sizing // ============================================================================= // ARCHITECTURE-SPECIFIC DEFINITIONS // ============================================================================= /* * Returns the ELF machine architecture constant for the current platform. * This is required for the jitdump header to correctly identify the target * architecture for perf processing. * */ static uint64_t GetElfMachineArchitecture(void) { #if defined(__x86_64__) || defined(_M_X64) return EM_X86_64; #elif defined(__i386__) || defined(_M_IX86) return EM_386; #elif defined(__aarch64__) return EM_AARCH64; #elif defined(__arm__) || defined(_M_ARM) return EM_ARM; #elif defined(__riscv) return EM_RISCV; #else Py_UNREACHABLE(); // Unsupported architecture - should never reach here return 0; #endif } // ============================================================================= // PERF JITDUMP DATA STRUCTURES // ============================================================================= /* * Perf jitdump file format structures * * These structures define the binary format that perf expects for JIT dump files. * The format is documented in the Linux perf tools source code and must match * exactly for proper perf integration. */ /* * Jitdump file header - written once at the beginning of each jitdump file * Contains metadata about the process and jitdump format version */ typedef struct { uint32_t magic; // Magic number (0x4A695444 = "JiTD") uint32_t version; // Jitdump format version (currently 1) uint32_t size; // Size of this header structure uint32_t elf_mach_target; // Target architecture (from GetElfMachineArchitecture) uint32_t reserved; // Reserved field (must be 0) uint32_t process_id; // Process ID of the JIT compiler uint64_t time_stamp; // Timestamp when jitdump was created uint64_t flags; // Feature flags (currently unused) } Header; /* * Perf event types supported by the jitdump format * Each event type has a corresponding structure format */ enum PerfEvent { PerfLoad = 0, // Code load event (new JIT function) PerfMove = 1, // Code move event (function relocated) PerfDebugInfo = 2, // Debug information event PerfClose = 3, // JIT session close event PerfUnwindingInfo = 4 // Stack unwinding information event }; /* * Base event structure - common header for all perf events * Every event in the jitdump file starts with this structure */ struct BaseEvent { uint32_t event; // Event type (from PerfEvent enum) uint32_t size; // Total size of this event including payload uint64_t time_stamp; // Timestamp when event occurred }; /* * Code load event - indicates a new JIT-compiled function is available * This is the most important event type for Python profiling */ typedef struct { struct BaseEvent base; // Common event header uint32_t process_id; // Process ID where code was generated #if defined(__APPLE__) uint64_t thread_id; // Thread ID where code was generated #else uint32_t thread_id; // Thread ID where code was generated #endif uint64_t vma; // Virtual memory address where code is loaded uint64_t code_address; // Address of the actual machine code uint64_t code_size; // Size of the machine code in bytes uint64_t code_id; // Unique identifier for this code region /* Followed by: * - null-terminated function name string * - raw machine code bytes */ } CodeLoadEvent; /* * Code unwinding information event - provides DWARF data for stack traces * Essential for proper stack unwinding during profiling */ typedef struct { struct BaseEvent base; // Common event header uint64_t unwind_data_size; // Size of the unwinding data uint64_t eh_frame_hdr_size; // Size of the EH frame header uint64_t mapped_size; // Total mapped size (with padding) /* Followed by: * - EH frame header * - DWARF unwinding information * - Padding to alignment boundary */ } CodeUnwindingInfoEvent; /* * EH Frame Header structure for DWARF unwinding * * This header provides metadata about the .eh_frame data that follows. * It uses PC-relative and data-relative encodings to keep the synthesized * DSO self-contained when perf injects it. */ typedef struct __attribute__((packed)) { uint8_t version; uint8_t eh_frame_ptr_enc; uint8_t fde_count_enc; uint8_t table_enc; int32_t eh_frame_ptr; uint32_t eh_fde_count; int32_t from; int32_t to; } EhFrameHeader; _Static_assert(sizeof(EhFrameHeader) == 20, "EhFrameHeader layout mismatch"); // ============================================================================= // GLOBAL STATE MANAGEMENT // ============================================================================= /* * Global state for the perf jitdump implementation * * This structure maintains all the state needed for generating jitdump files. * It's designed as a singleton since there's typically only one jitdump file * per Python process. */ typedef struct { FILE* perf_map; // File handle for the jitdump file PyMutex map_lock; // Thread synchronization lock void* mapped_buffer; // Memory-mapped region (signals perf we're active) size_t mapped_size; // Size of the mapped region uint32_t code_id; // Counter for unique code region identifiers uint64_t build_id_salt; // Per-process salt for unique synthetic DSOs } PerfMapJitState; /* Global singleton instance */ static PerfMapJitState perf_jit_map_state; // ============================================================================= // TIME UTILITIES // ============================================================================= /* Time conversion constant */ static const intptr_t nanoseconds_per_second = 1000000000; /* * Get current monotonic time in nanoseconds * * Monotonic time is preferred for event timestamps because it's not affected * by system clock adjustments. This ensures consistent timing relationships * between events even if the system clock is changed. * * Returns: Current monotonic time in nanoseconds since an arbitrary epoch */ static int64_t get_current_monotonic_ticks(void) { struct timespec ts; if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { Py_UNREACHABLE(); // Should never fail on supported systems return 0; } /* Convert to nanoseconds for maximum precision */ int64_t result = ts.tv_sec; result *= nanoseconds_per_second; result += ts.tv_nsec; return result; } /* * Get current wall clock time in microseconds * * Used for the jitdump file header timestamp. Unlike monotonic time, * this represents actual wall clock time that can be correlated with * other system events. * * Returns: Current time in microseconds since Unix epoch */ static int64_t get_current_time_microseconds(void) { struct timeval tv; if (gettimeofday(&tv, NULL) < 0) { Py_UNREACHABLE(); // Should never fail on supported systems return 0; } return ((int64_t)(tv.tv_sec) * 1000000) + tv.tv_usec; } // ============================================================================= // FILE I/O UTILITIES // ============================================================================= /* * Write data to the jitdump file with error handling * * This function ensures that all data is written to the file, handling * partial writes that can occur with large buffers or when the system * is under load. * * Args: * buffer: Pointer to data to write * size: Number of bytes to write */ static void perf_map_jit_write_fully(const void* buffer, size_t size) { FILE* out_file = perf_jit_map_state.perf_map; const char* ptr = (const char*)(buffer); while (size > 0) { const size_t written = fwrite(ptr, 1, size, out_file); if (written == 0) { Py_UNREACHABLE(); // Write failure - should be very rare break; } size -= written; ptr += written; } } /* * Write the jitdump file header * * The header must be written exactly once at the beginning of each jitdump * file. It provides metadata that perf uses to parse the rest of the file. * * Args: * pid: Process ID to include in the header * out_file: File handle to write to (currently unused, uses global state) */ static void perf_map_jit_write_header(int pid, FILE* out_file) { Header header; /* Initialize header with required values */ header.magic = 0x4A695444; // "JiTD" magic number header.version = 1; // Current jitdump version header.size = sizeof(Header); // Header size for validation header.elf_mach_target = GetElfMachineArchitecture(); // Target architecture header.reserved = 0; // padding reserved for future use header.process_id = pid; // Process identifier header.time_stamp = get_current_time_microseconds(); // Creation time header.flags = 0; // No special flags currently used perf_map_jit_write_fully(&header, sizeof(header)); } // ============================================================================= // JITDUMP INITIALIZATION // ============================================================================= /* * Initialize the perf jitdump interface * * This function sets up everything needed to generate jitdump files: * 1. Creates the jitdump file with a unique name * 2. Maps the first page to signal perf that we're using the interface * 3. Writes the jitdump header * 4. Initializes synchronization primitives * * The memory mapping is crucial - perf detects jitdump files by scanning * for processes that have mapped files matching the pattern /tmp/jit-*.dump * * Returns: Pointer to initialized state, or NULL on failure */ static void* perf_map_jit_init(void) { PyMutex_Lock(&perf_jit_map_state.map_lock); if (perf_jit_map_state.perf_map != NULL) { PyMutex_Unlock(&perf_jit_map_state.map_lock); return &perf_jit_map_state; } char filename[100]; int pid = getpid(); /* Create unique filename based on process ID */ snprintf(filename, sizeof(filename) - 1, "/tmp/jit-%d.dump", pid); /* Create/open the jitdump file with appropriate permissions */ const int fd = open(filename, O_CREAT | O_TRUNC | O_RDWR, 0666); if (fd == -1) { PyMutex_Unlock(&perf_jit_map_state.map_lock); return NULL; // Failed to create file } /* Get system page size for memory mapping */ const long page_size = sysconf(_SC_PAGESIZE); if (page_size == -1) { close(fd); PyMutex_Unlock(&perf_jit_map_state.map_lock); return NULL; // Failed to get page size } #if defined(__APPLE__) // On macOS, samply uses a preload to find jitdumps and this mmap can be slow. perf_jit_map_state.mapped_buffer = NULL; #else /* * Map the first page of the jitdump file * * This memory mapping serves as a signal to perf that this process * is generating JIT code. Perf scans /proc/.../maps looking for mapped * files that match the jitdump naming pattern. * * The mapping must be PROT_READ | PROT_EXEC to be detected by perf. */ perf_jit_map_state.mapped_buffer = mmap( NULL, // Let kernel choose address page_size, // Map one page PROT_READ | PROT_EXEC, // Read and execute permissions (required by perf) MAP_PRIVATE, // Private mapping fd, // File descriptor 0 // Offset 0 (first page) ); if (perf_jit_map_state.mapped_buffer == MAP_FAILED) { perf_jit_map_state.mapped_buffer = NULL; close(fd); PyMutex_Unlock(&perf_jit_map_state.map_lock); return NULL; // Memory mapping failed } (void)_PyAnnotateMemoryMap(perf_jit_map_state.mapped_buffer, page_size, "cpython:perf_jit_trampoline"); #endif perf_jit_map_state.mapped_size = page_size; /* Convert file descriptor to FILE* for easier I/O operations */ perf_jit_map_state.perf_map = fdopen(fd, "w+"); if (perf_jit_map_state.perf_map == NULL) { close(fd); PyMutex_Unlock(&perf_jit_map_state.map_lock); return NULL; // Failed to create FILE* } /* * Set up file buffering for better performance * * We use a large buffer (2MB) because jitdump files can be written * frequently during program execution. Buffering reduces system call * overhead and improves overall performance. */ setvbuf(perf_jit_map_state.perf_map, NULL, _IOFBF, 2 * MB); /* Write the jitdump file header */ perf_map_jit_write_header(pid, perf_jit_map_state.perf_map); /* Initialize code ID counter */ perf_jit_map_state.code_id = 0; perf_jit_map_state.build_id_salt = ((uint64_t)pid << 32) ^ (uint64_t)get_current_monotonic_ticks(); /* Calculate padding size based on actual unwind info requirements */ size_t eh_frame_size = _PyJitUnwind_EhFrameSize(0); size_t unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; trampoline_api.code_padding = _Py_SIZE_ROUND_UP(unwind_data_size, 16); trampoline_api.code_alignment = 32; PyMutex_Unlock(&perf_jit_map_state.map_lock); return &perf_jit_map_state; } // ============================================================================= // MAIN JITDUMP ENTRY WRITING // ============================================================================= /* * Write a complete jitdump entry for a code region with a provided name. * * This shares the same implementation as the trampoline callback, but * allows callers that don't have a PyCodeObject to reuse the jitdump * infrastructure. */ static void perf_map_jit_write_entry_with_name( void *state, const void *code_addr, size_t code_size, const char *entry, const char *filename ) { /* Initialize jitdump system on first use */ void* ret = perf_map_jit_init(); if (ret == NULL) { return; // Initialization failed, silently abort } if (entry == NULL) { entry = ""; } if (filename == NULL) { filename = ""; } /* * Create formatted function name for perf display * * Format: "py:::" * The "py::" prefix helps identify Python functions in mixed-language * profiles (e.g., when profiling C extensions alongside Python code). */ size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); if (perf_map_entry == NULL) { return; // Memory allocation failed } snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); const size_t name_length = strlen(perf_map_entry); uword base = (uword)code_addr; uword size = code_size; /* * Generate DWARF unwinding information * * DWARF data is essential for proper stack unwinding during profiling. * Without it, perf cannot generate accurate call graphs, especially * in optimized code where frame pointers may be omitted. */ uint8_t buffer[1024]; // Buffer for DWARF data (1KB should be sufficient) size_t eh_frame_size = _PyJitUnwind_BuildEhFrame( buffer, sizeof(buffer), code_addr, code_size, 0); if (eh_frame_size == 0) { PyMem_RawFree(perf_map_entry); return; } /* * A logical jitdump entry is written as multiple records and also consumes * a process-global code_id. Serialize the whole sequence so concurrent JIT * compilation cannot interleave records or reuse an ID. */ PyMutex_Lock(&perf_jit_map_state.map_lock); /* * Write Code Unwinding Information Event * * This event must be written before the code load event to ensure * perf has the unwinding information available when it processes * the code region. */ CodeUnwindingInfoEvent ev2; ev2.base.event = PerfUnwindingInfo; ev2.base.time_stamp = get_current_monotonic_ticks(); ev2.unwind_data_size = sizeof(EhFrameHeader) + eh_frame_size; /* Verify we don't exceed our padding budget */ assert(ev2.unwind_data_size <= (uint64_t)trampoline_api.code_padding); ev2.eh_frame_hdr_size = sizeof(EhFrameHeader); ev2.mapped_size = _Py_SIZE_ROUND_UP(ev2.unwind_data_size, 16); // 16-byte alignment /* Calculate total event size with padding */ int content_size = (int)(sizeof(ev2) + sizeof(EhFrameHeader) + eh_frame_size); int padding_size = (int)_Py_SIZE_ROUND_UP((size_t)content_size, 8) - content_size; // 8-byte align ev2.base.size = (uint32_t)(content_size + padding_size); /* Write the unwinding info event header */ perf_map_jit_write_fully(&ev2, sizeof(ev2)); /* * Write EH Frame Header * * The EH frame header provides metadata about the DWARF unwinding * information that follows. It includes pointers and counts that * help perf navigate the unwinding data efficiently. */ EhFrameHeader f; f.version = 1; f.eh_frame_ptr_enc = DWRF_EH_PE_sdata4 | DWRF_EH_PE_pcrel; f.fde_count_enc = DWRF_EH_PE_udata4; f.table_enc = DWRF_EH_PE_sdata4 | DWRF_EH_PE_datarel; /* Calculate relative offsets for EH frame navigation */ f.eh_frame_ptr = -(int32_t)(eh_frame_size + 4 * sizeof(unsigned char)); f.eh_fde_count = 1; // We generate exactly one FDE per function f.from = -(int32_t)(_Py_SIZE_ROUND_UP(code_size, 8) + eh_frame_size); uint32_t cie_payload_size; memcpy(&cie_payload_size, buffer, sizeof(cie_payload_size)); int cie_size = (int)(sizeof(cie_payload_size) + cie_payload_size); f.to = -(int32_t)(eh_frame_size - cie_size); /* Write EH frame data and header */ perf_map_jit_write_fully(buffer, eh_frame_size); perf_map_jit_write_fully(&f, sizeof(f)); /* Write padding to maintain alignment */ char padding_bytes[] = "\0\0\0\0\0\0\0\0"; perf_map_jit_write_fully(&padding_bytes, padding_size); /* * Write Code Load Event * * This event tells perf about the new code region. It includes: * - Memory addresses and sizes * - Process and thread identification * - Function name for symbol resolution * - The actual machine code bytes */ CodeLoadEvent ev; ev.base.event = PerfLoad; ev.base.size = sizeof(ev) + (name_length+1) + size; ev.base.time_stamp = get_current_monotonic_ticks(); ev.process_id = getpid(); #if defined(__APPLE__) pthread_threadid_np(NULL, &ev.thread_id); #else ev.thread_id = syscall(SYS_gettid); // Get thread ID via system call #endif ev.vma = base; // Virtual memory address ev.code_address = base; // Same as VMA for our use case ev.code_size = size; /* Assign unique code ID and increment counter */ perf_jit_map_state.code_id += 1; ev.code_id = perf_jit_map_state.code_id; /* Write code load event and associated data */ perf_map_jit_write_fully(&ev, sizeof(ev)); perf_map_jit_write_fully(perf_map_entry, name_length+1); // Include null terminator /* * Ensure each synthetic DSO has unique .text bytes. * * perf merges DSOs that share a build-id. Since trampolines can share * identical code and unwind bytes, perf may resolve all JIT frames to * the first symbol it saw (including entries from previous runs when * build-id caching is enabled). Patch a small marker in the emitted * bytes to make the build-id depend on a per-process salt and code id * without modifying the live code. */ uint64_t marker = perf_jit_map_state.build_id_salt ^ ((uint64_t)perf_jit_map_state.code_id << 32) ^ (uint64_t)code_size; if (size >= sizeof(marker)) { size_t prefix = size - sizeof(marker); perf_map_jit_write_fully((void *)(base), prefix); perf_map_jit_write_fully(&marker, sizeof(marker)); } else if (size > 0) { uint8_t tmp[sizeof(marker)]; memcpy(tmp, (void *)(base), size); for (size_t i = 0; i < size; i++) { tmp[i] ^= (uint8_t)(marker >> (i * 8)); } perf_map_jit_write_fully(tmp, size); } /* Clean up allocated memory */ PyMutex_Unlock(&perf_jit_map_state.map_lock); PyMem_RawFree(perf_map_entry); } /* * Write a complete jitdump entry for a Python function * * This is the main function called by Python's trampoline system whenever * a new piece of JIT-compiled code needs to be recorded. It writes both * the unwinding information and the code load event to the jitdump file. * * The function performs these steps: * 1. Initialize jitdump system if not already done * 2. Extract function name and filename from Python code object * 3. Generate DWARF unwinding information * 4. Write unwinding info event to jitdump file * 5. Write code load event to jitdump file * * Args: * state: Jitdump state (currently unused, uses global state) * code_addr: Address where the compiled code resides * code_size: Size of the compiled code in bytes * co: Python code object containing metadata * * IMPORTANT: This function signature is part of Python's internal API * and must not be changed without coordinating with core Python development. */ static void perf_map_jit_write_entry(void *state, const void *code_addr, size_t code_size, PyCodeObject *co) { const char *entry = ""; const char *filename = ""; if (co != NULL) { if (co->co_qualname != NULL) { entry = PyUnicode_AsUTF8(co->co_qualname); } if (co->co_filename != NULL) { filename = PyUnicode_AsUTF8(co->co_filename); } } perf_map_jit_write_entry_with_name(state, code_addr, code_size, entry, filename); } void _PyPerfJit_WriteNamedCode(const void *code_addr, size_t code_size, const char *entry, const char *filename) { perf_map_jit_write_entry_with_name( NULL, code_addr, code_size, entry, filename); } // ============================================================================= // CLEANUP AND FINALIZATION // ============================================================================= /* * Finalize and cleanup the perf jitdump system * * This function is called when Python is shutting down or when the * perf trampoline system is being disabled. It ensures all resources * are properly released and all buffered data is flushed to disk. * * Args: * state: Jitdump state (currently unused, uses global state) * * Returns: 0 on success * * IMPORTANT: This function signature is part of Python's internal API * and must not be changed without coordinating with core Python development. */ static int perf_map_jit_fini(void* state) { /* * Close jitdump file with proper synchronization * * We need to acquire the lock to ensure no other threads are * writing to the file when we close it. This prevents corruption * and ensures all data is properly flushed. */ PyMutex_Lock(&perf_jit_map_state.map_lock); if (perf_jit_map_state.perf_map != NULL) { fclose(perf_jit_map_state.perf_map); // This also flushes buffers perf_jit_map_state.perf_map = NULL; } PyMutex_Unlock(&perf_jit_map_state.map_lock); /* * Unmap the memory region * * This removes the signal to perf that we were generating JIT code. * After this point, perf will no longer detect this process as * having JIT capabilities. */ if (perf_jit_map_state.mapped_buffer != NULL) { munmap(perf_jit_map_state.mapped_buffer, perf_jit_map_state.mapped_size); perf_jit_map_state.mapped_buffer = NULL; } /* Clear global state reference */ trampoline_api.state = NULL; return 0; // Success } // ============================================================================= // PUBLIC API EXPORT // ============================================================================= /* * Python Perf Callbacks Structure * * This structure defines the callback interface that Python's trampoline * system uses to integrate with perf profiling. It contains function * pointers for initialization, event writing, and cleanup. * * CRITICAL: This structure and its contents are part of Python's internal * API. The function signatures and behavior must remain stable to maintain * compatibility with the Python interpreter's perf integration system. * * Used by: Python's _PyPerf_Callbacks system in pycore_ceval.h */ _PyPerf_Callbacks _Py_perfmap_jit_callbacks = { &perf_map_jit_init, // Initialization function &perf_map_jit_write_entry, // Event writing function &perf_map_jit_fini, // Cleanup function }; #endif /* PY_HAVE_PERF_TRAMPOLINE */