| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | /*
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Perf trampoline instrumentation | 
					
						
							|  |  |  | =============================== | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This file contains instrumentation to allow to associate | 
					
						
							|  |  |  | calls to the CPython eval loop back to the names of the Python | 
					
						
							|  |  |  | functions and filename being executed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Many native performance profilers like the Linux perf tools are | 
					
						
							|  |  |  | only available to 'see' the C stack when sampling from the profiled | 
					
						
							|  |  |  | process. This means that if we have the following python code: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     import time | 
					
						
							|  |  |  |     def foo(n): | 
					
						
							|  |  |  |         # Some CPU intensive code
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def bar(n): | 
					
						
							|  |  |  |         foo(n) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def baz(n): | 
					
						
							|  |  |  |         bar(n) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     baz(10000000) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | A performance profiler that is only able to see native frames will | 
					
						
							|  |  |  | produce the following backtrace when sampling from foo(): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _PyEval_EvalFrameDefault -----> Evaluation frame of foo() | 
					
						
							|  |  |  |     _PyEval_Vector | 
					
						
							|  |  |  |     _PyFunction_Vectorcall | 
					
						
							|  |  |  |     PyObject_Vectorcall | 
					
						
							|  |  |  |     call_function | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _PyEval_EvalFrameDefault ------> Evaluation frame of bar() | 
					
						
							|  |  |  |     _PyEval_EvalFrame | 
					
						
							|  |  |  |     _PyEval_Vector | 
					
						
							|  |  |  |     _PyFunction_Vectorcall | 
					
						
							|  |  |  |     PyObject_Vectorcall | 
					
						
							|  |  |  |     call_function | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _PyEval_EvalFrameDefault -------> Evaluation frame of baz() | 
					
						
							|  |  |  |     _PyEval_EvalFrame | 
					
						
							|  |  |  |     _PyEval_Vector | 
					
						
							|  |  |  |     _PyFunction_Vectorcall | 
					
						
							|  |  |  |     PyObject_Vectorcall | 
					
						
							|  |  |  |     call_function | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ... | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_RunMain | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Because the profiler is only able to see the native frames and the native | 
					
						
							|  |  |  | function that runs the evaluation loop is the same (_PyEval_EvalFrameDefault) | 
					
						
							|  |  |  | then the profiler and any reporter generated by it will not be able to | 
					
						
							|  |  |  | associate the names of the Python functions and the filenames associated with | 
					
						
							|  |  |  | those calls, rendering the results useless in the Python world. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | To fix this problem, we introduce the concept of a trampoline frame. A | 
					
						
							|  |  |  | trampoline frame is a piece of code that is unique per Python code object that | 
					
						
							|  |  |  | is executed before entering the CPython eval loop. This piece of code just | 
					
						
							|  |  |  | calls the original Python evaluation function (_PyEval_EvalFrameDefault) and | 
					
						
							|  |  |  | forwards all the arguments received. In this way, when a profiler samples | 
					
						
							|  |  |  | frames from the previous example it will see; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _PyEval_EvalFrameDefault -----> Evaluation frame of foo() | 
					
						
							|  |  |  |     [Jit compiled code 3] | 
					
						
							|  |  |  |     _PyEval_Vector | 
					
						
							|  |  |  |     _PyFunction_Vectorcall | 
					
						
							|  |  |  |     PyObject_Vectorcall | 
					
						
							|  |  |  |     call_function | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _PyEval_EvalFrameDefault ------> Evaluation frame of bar() | 
					
						
							|  |  |  |     [Jit compiled code 2] | 
					
						
							|  |  |  |     _PyEval_EvalFrame | 
					
						
							|  |  |  |     _PyEval_Vector | 
					
						
							|  |  |  |     _PyFunction_Vectorcall | 
					
						
							|  |  |  |     PyObject_Vectorcall | 
					
						
							|  |  |  |     call_function | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _PyEval_EvalFrameDefault -------> Evaluation frame of baz() | 
					
						
							|  |  |  |     [Jit compiled code 1] | 
					
						
							|  |  |  |     _PyEval_EvalFrame | 
					
						
							|  |  |  |     _PyEval_Vector | 
					
						
							|  |  |  |     _PyFunction_Vectorcall | 
					
						
							|  |  |  |     PyObject_Vectorcall | 
					
						
							|  |  |  |     call_function | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ... | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_RunMain | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | When we generate every unique copy of the trampoline (what here we called "[Jit | 
					
						
							|  |  |  | compiled code N]") we write the relationship between the compiled code and the | 
					
						
							|  |  |  | Python function that is associated with it. Every profiler requires this | 
					
						
							|  |  |  | information in a different format. For example, the Linux "perf" profiler | 
					
						
							|  |  |  | requires a file in "/tmp/perf-PID.map" (name and location not configurable) | 
					
						
							|  |  |  | with the following format: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     <compiled code address> <compiled code size> <name of the compiled code> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | If this file is available when "perf" generates reports, it will automatically | 
					
						
							|  |  |  | associate every trampoline with the Python function that it is associated with | 
					
						
							|  |  |  | allowing it to generate reports that include Python information. These reports | 
					
						
							|  |  |  | then can also be filtered in a way that *only* Python information appears. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Notice that for this to work, there must be a unique copied of the trampoline | 
					
						
							|  |  |  | per Python code object even if the code in the trampoline is the same. To | 
					
						
							|  |  |  | achieve this we have a assembly template in Objects/asm_trampiline.S that is | 
					
						
							|  |  |  | compiled into the Python executable/shared library. This template generates a | 
					
						
							|  |  |  | symbol that maps the start of the assembly code and another that marks the end | 
					
						
							|  |  |  | of the assembly code for the trampoline.  Then, every time we need a unique | 
					
						
							|  |  |  | trampoline for a Python code object, we copy the assembly code into a mmaped | 
					
						
							|  |  |  | area that has executable permissions and we return the start of that area as | 
					
						
							|  |  |  | our trampoline function. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Asking for a mmap-ed memory area for trampoline is very wasteful so we | 
					
						
							|  |  |  | allocate big arenas of memory in a single mmap call, we populate the entire | 
					
						
							|  |  |  | arena with copies of the trampoline (this allows us to now have to invalidate | 
					
						
							|  |  |  | the icache for the instructions in the page) and then we return the next | 
					
						
							|  |  |  | available chunk every time someone asks for a new trampoline. We keep a linked | 
					
						
							|  |  |  | list of arenas in case the current memory arena is exhausted and another one is | 
					
						
							|  |  |  | needed. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | For the best results, Python should be compiled with | 
					
						
							|  |  |  | CFLAGS="-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer" as this allows | 
					
						
							|  |  |  | profilers to unwind using only the frame pointer and not on DWARF debug | 
					
						
							|  |  |  | information (note that as trampilines are dynamically generated there won't be | 
					
						
							|  |  |  | any DWARF information available for them). | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include "Python.h"
 | 
					
						
							| 
									
										
										
										
											2023-09-06 15:56:08 +02:00
										 |  |  | #include "pycore_ceval.h"         // _PyPerf_Callbacks
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | #include "pycore_frame.h"
 | 
					
						
							|  |  |  | #include "pycore_interp.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <fcntl.h>
 | 
					
						
							|  |  |  | #include <stdio.h>
 | 
					
						
							|  |  |  | #include <stdlib.h>
 | 
					
						
							| 
									
										
										
										
											2023-09-02 16:50:18 +02:00
										 |  |  | #include <sys/mman.h>             // mmap()
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | #include <sys/types.h>
 | 
					
						
							| 
									
										
										
										
											2023-09-02 16:50:18 +02:00
										 |  |  | #include <unistd.h>               // sysconf()
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #include <sys/time.h>           // gettimeofday()
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-08 12:04:41 +01:00
										 |  |  | #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__)
 | 
					
						
							|  |  |  | #define PY_HAVE_INVALIDATE_ICACHE
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #if defined(__clang__) || defined(__GNUC__)
 | 
					
						
							|  |  |  | extern void __clear_cache(void *, void*); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void invalidate_icache(char* begin, char*end) { | 
					
						
							|  |  |  | #if defined(__clang__) || defined(__GNUC__)
 | 
					
						
							|  |  |  |     return __clear_cache(begin, end); | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | /* The function pointer is passed as last argument. The other three arguments
 | 
					
						
							|  |  |  |  * are passed in the same order as the function requires. This results in | 
					
						
							|  |  |  |  * shorter, more efficient ASM code for trampoline. | 
					
						
							|  |  |  |  */ | 
					
						
							|  |  |  | typedef PyObject *(*py_evaluator)(PyThreadState *, _PyInterpreterFrame *, | 
					
						
							|  |  |  |                                   int throwflag); | 
					
						
							|  |  |  | typedef PyObject *(*py_trampoline)(PyThreadState *, _PyInterpreterFrame *, int, | 
					
						
							|  |  |  |                                    py_evaluator); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | extern void *_Py_trampoline_func_start;  // Start of the template of the
 | 
					
						
							|  |  |  |                                          // assembly trampoline
 | 
					
						
							|  |  |  | extern void * | 
					
						
							|  |  |  |     _Py_trampoline_func_end;  // End of the template of the assembly trampoline
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | struct code_arena_st { | 
					
						
							|  |  |  |     char *start_addr;    // Start of the memory arena
 | 
					
						
							|  |  |  |     char *current_addr;  // Address of the current trampoline within the arena
 | 
					
						
							|  |  |  |     size_t size;         // Size of the memory arena
 | 
					
						
							|  |  |  |     size_t size_left;    // Remaining size of the memory arena
 | 
					
						
							|  |  |  |     size_t code_size;    // Size of the code of every trampoline in the arena
 | 
					
						
							|  |  |  |     struct code_arena_st | 
					
						
							|  |  |  |         *prev;  // Pointer to the arena  or NULL if this is the first arena.
 | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct code_arena_st code_arena_t; | 
					
						
							|  |  |  | typedef struct trampoline_api_st trampoline_api_t; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | enum perf_trampoline_type { | 
					
						
							|  |  |  |     PERF_TRAMPOLINE_UNSET = 0, | 
					
						
							|  |  |  |     PERF_TRAMPOLINE_TYPE_MAP = 1, | 
					
						
							|  |  |  |     PERF_TRAMPOLINE_TYPE_JITDUMP = 2, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  | #define perf_status _PyRuntime.ceval.perf.status
 | 
					
						
							|  |  |  | #define extra_code_index _PyRuntime.ceval.perf.extra_code_index
 | 
					
						
							|  |  |  | #define perf_code_arena _PyRuntime.ceval.perf.code_arena
 | 
					
						
							|  |  |  | #define trampoline_api _PyRuntime.ceval.perf.trampoline_api
 | 
					
						
							|  |  |  | #define perf_map_file _PyRuntime.ceval.perf.map_file
 | 
					
						
							| 
									
										
										
										
											2023-10-26 20:57:29 -07:00
										 |  |  | #define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | #define perf_trampoline_type _PyRuntime.ceval.perf.perf_trampoline_type
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | perf_map_write_entry(void *state, const void *code_addr, | 
					
						
							|  |  |  |                          unsigned int code_size, PyCodeObject *co) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-05-21 03:12:24 -07:00
										 |  |  |     const char *entry = ""; | 
					
						
							|  |  |  |     if (co->co_qualname != NULL) { | 
					
						
							|  |  |  |         entry = PyUnicode_AsUTF8(co->co_qualname); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-21 03:12:24 -07:00
										 |  |  |     const char *filename = ""; | 
					
						
							|  |  |  |     if (co->co_filename != NULL) { | 
					
						
							|  |  |  |         filename = PyUnicode_AsUTF8(co->co_filename); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     size_t perf_map_entry_size = snprintf(NULL, 0, "py::%s:%s", entry, filename) + 1; | 
					
						
							|  |  |  |     char* perf_map_entry = (char*) PyMem_RawMalloc(perf_map_entry_size); | 
					
						
							|  |  |  |     if (perf_map_entry == NULL) { | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-05-21 03:12:24 -07:00
										 |  |  |     snprintf(perf_map_entry, perf_map_entry_size, "py::%s:%s", entry, filename); | 
					
						
							|  |  |  |     PyUnstable_WritePerfMapEntry(code_addr, code_size, perf_map_entry); | 
					
						
							|  |  |  |     PyMem_RawFree(perf_map_entry); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  | static void* | 
					
						
							|  |  |  | perf_map_init_state(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyUnstable_PerfMapState_Init(); | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     trampoline_api.code_padding = 0; | 
					
						
							|  |  |  |     perf_trampoline_type = PERF_TRAMPOLINE_TYPE_MAP; | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | perf_map_free_state(void *state) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyUnstable_PerfMapState_Fini(); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | _PyPerf_Callbacks _Py_perfmap_callbacks = { | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     &perf_map_init_state, | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     &perf_map_write_entry, | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     &perf_map_free_state, | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | static size_t round_up(int64_t value, int64_t multiple) { | 
					
						
							|  |  |  |     if (multiple == 0) { | 
					
						
							|  |  |  |         // Avoid division by zero
 | 
					
						
							|  |  |  |         return value; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int64_t remainder = value % multiple; | 
					
						
							|  |  |  |     if (remainder == 0) { | 
					
						
							|  |  |  |         // Value is already a multiple of 'multiple'
 | 
					
						
							|  |  |  |         return value; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Calculate the difference to the next multiple
 | 
					
						
							|  |  |  |     int64_t difference = multiple - remainder; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Add the difference to the value
 | 
					
						
							|  |  |  |     int64_t rounded_up_value = value + difference; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return rounded_up_value; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // TRAMPOLINE MANAGEMENT API
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | static int | 
					
						
							|  |  |  | new_code_arena(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     // non-trivial programs typically need 64 to 256 kiB.
 | 
					
						
							|  |  |  |     size_t mem_size = 4096 * 16; | 
					
						
							|  |  |  |     assert(mem_size % sysconf(_SC_PAGESIZE) == 0); | 
					
						
							|  |  |  |     char *memory = | 
					
						
							|  |  |  |         mmap(NULL,  // address
 | 
					
						
							|  |  |  |              mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, | 
					
						
							|  |  |  |              -1,  // fd (not used here)
 | 
					
						
							|  |  |  |              0);  // offset (not used here)
 | 
					
						
							| 
									
										
										
										
											2023-12-21 11:28:55 -08:00
										 |  |  |     if (memory == MAP_FAILED) { | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         PyErr_SetFromErrno(PyExc_OSError); | 
					
						
							| 
									
										
										
										
											2023-11-02 11:16:34 +02:00
										 |  |  |         PyErr_FormatUnraisable("Failed to create new mmap for perf trampoline"); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         perf_status = PERF_STATUS_FAILED; | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     void *start = &_Py_trampoline_func_start; | 
					
						
							|  |  |  |     void *end = &_Py_trampoline_func_end; | 
					
						
							|  |  |  |     size_t code_size = end - start; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     size_t chunk_size = round_up(code_size + trampoline_api.code_padding, 16); | 
					
						
							| 
									
										
										
										
											2022-08-30 19:37:22 +01:00
										 |  |  |     // TODO: Check the effect of alignment of the code chunks. Initial investigation
 | 
					
						
							|  |  |  |     // showed that this has no effect on performance in x86-64 or aarch64 and the current
 | 
					
						
							|  |  |  |     // version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
 | 
					
						
							|  |  |  |     //
 | 
					
						
							|  |  |  |     // We should check the values in the future and see if there is a
 | 
					
						
							|  |  |  |     // measurable performance improvement by rounding trampolines up to 32-bit
 | 
					
						
							|  |  |  |     // or 64-bit alignment.
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     size_t n_copies = mem_size / chunk_size; | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     for (size_t i = 0; i < n_copies; i++) { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         memcpy(memory + i * chunk_size, start, code_size * sizeof(char)); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     // Some systems may prevent us from creating executable code on the fly.
 | 
					
						
							|  |  |  |     int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); | 
					
						
							|  |  |  |     if (res == -1) { | 
					
						
							|  |  |  |         PyErr_SetFromErrno(PyExc_OSError); | 
					
						
							|  |  |  |         munmap(memory, mem_size); | 
					
						
							| 
									
										
										
										
											2023-11-02 11:16:34 +02:00
										 |  |  |         PyErr_FormatUnraisable("Failed to set mmap for perf trampoline to " | 
					
						
							|  |  |  |                                "PROT_READ | PROT_EXEC"); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-08 12:04:41 +01:00
										 |  |  | #ifdef PY_HAVE_INVALIDATE_ICACHE
 | 
					
						
							|  |  |  |     // Before the JIT can run a block of code that has been emitted it must invalidate
 | 
					
						
							|  |  |  |     // the instruction cache on some platforms like arm and aarch64.
 | 
					
						
							|  |  |  |     invalidate_icache(memory, memory + mem_size); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     code_arena_t *new_arena = PyMem_RawCalloc(1, sizeof(code_arena_t)); | 
					
						
							|  |  |  |     if (new_arena == NULL) { | 
					
						
							|  |  |  |         PyErr_NoMemory(); | 
					
						
							|  |  |  |         munmap(memory, mem_size); | 
					
						
							| 
									
										
										
										
											2023-11-02 11:16:34 +02:00
										 |  |  |         PyErr_FormatUnraisable("Failed to allocate new code arena struct for perf trampoline"); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     new_arena->start_addr = memory; | 
					
						
							|  |  |  |     new_arena->current_addr = memory; | 
					
						
							|  |  |  |     new_arena->size = mem_size; | 
					
						
							|  |  |  |     new_arena->size_left = mem_size; | 
					
						
							|  |  |  |     new_arena->code_size = code_size; | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  |     new_arena->prev = perf_code_arena; | 
					
						
							|  |  |  |     perf_code_arena = new_arena; | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | free_code_arenas(void) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  |     code_arena_t *cur = perf_code_arena; | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     code_arena_t *prev; | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  |     perf_code_arena = NULL;  // invalid static pointer
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     while (cur) { | 
					
						
							|  |  |  |         munmap(cur->start_addr, cur->size); | 
					
						
							|  |  |  |         prev = cur->prev; | 
					
						
							|  |  |  |         PyMem_RawFree(cur); | 
					
						
							|  |  |  |         cur = prev; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline py_trampoline | 
					
						
							|  |  |  | code_arena_new_code(code_arena_t *code_arena) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     py_trampoline trampoline = (py_trampoline)code_arena->current_addr; | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     size_t total_code_size = round_up(code_arena->code_size + trampoline_api.code_padding, 16); | 
					
						
							|  |  |  |     code_arena->size_left -= total_code_size; | 
					
						
							|  |  |  |     code_arena->current_addr += total_code_size; | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     return trampoline; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline py_trampoline | 
					
						
							|  |  |  | compile_trampoline(void) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |     size_t total_code_size = round_up(perf_code_arena->code_size + trampoline_api.code_padding, 16); | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  |     if ((perf_code_arena == NULL) || | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         (perf_code_arena->size_left <= total_code_size)) { | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         if (new_code_arena() < 0) { | 
					
						
							|  |  |  |             return NULL; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  |     assert(perf_code_arena->size_left <= perf_code_arena->size); | 
					
						
							|  |  |  |     return code_arena_new_code(perf_code_arena); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							|  |  |  | py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, | 
					
						
							|  |  |  |                         int throw) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (perf_status == PERF_STATUS_FAILED || | 
					
						
							|  |  |  |         perf_status == PERF_STATUS_NO_INIT) { | 
					
						
							|  |  |  |         goto default_eval; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-14 13:46:37 +01:00
										 |  |  |     PyCodeObject *co = _PyFrame_GetCode(frame); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     py_trampoline f = NULL; | 
					
						
							|  |  |  |     assert(extra_code_index != -1); | 
					
						
							|  |  |  |     int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); | 
					
						
							|  |  |  |     if (ret != 0 || f == NULL) { | 
					
						
							|  |  |  |         // This is the first time we see this code object so we need
 | 
					
						
							|  |  |  |         // to compile a trampoline for it.
 | 
					
						
							|  |  |  |         py_trampoline new_trampoline = compile_trampoline(); | 
					
						
							|  |  |  |         if (new_trampoline == NULL) { | 
					
						
							|  |  |  |             goto default_eval; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         trampoline_api.write_state(trampoline_api.state, new_trampoline, | 
					
						
							| 
									
										
										
										
											2022-12-08 17:17:20 -07:00
										 |  |  |                                    perf_code_arena->code_size, co); | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         _PyCode_SetExtra((PyObject *)co, extra_code_index, | 
					
						
							|  |  |  |                          (void *)new_trampoline); | 
					
						
							|  |  |  |         f = new_trampoline; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     assert(f != NULL); | 
					
						
							|  |  |  |     return f(ts, frame, throw, _PyEval_EvalFrameDefault); | 
					
						
							|  |  |  | default_eval: | 
					
						
							|  |  |  |     // Something failed, fall back to the default evaluator.
 | 
					
						
							|  |  |  |     return _PyEval_EvalFrameDefault(ts, frame, throw); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | #endif  // PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-26 20:57:29 -07:00
										 |  |  | int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     py_trampoline f = NULL; | 
					
						
							|  |  |  |     assert(extra_code_index != -1); | 
					
						
							|  |  |  |     int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); | 
					
						
							|  |  |  |     if (ret != 0 || f == NULL) { | 
					
						
							|  |  |  |         py_trampoline new_trampoline = compile_trampoline(); | 
					
						
							|  |  |  |         if (new_trampoline == NULL) { | 
					
						
							|  |  |  |             return 0; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         trampoline_api.write_state(trampoline_api.state, new_trampoline, | 
					
						
							|  |  |  |                                    perf_code_arena->code_size, co); | 
					
						
							|  |  |  |         return _PyCode_SetExtra((PyObject *)co, extra_code_index, | 
					
						
							|  |  |  |                          (void *)new_trampoline); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif // PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | int | 
					
						
							|  |  |  | _PyIsPerfTrampolineActive(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     PyThreadState *tstate = _PyThreadState_GET(); | 
					
						
							|  |  |  |     return tstate->interp->eval_frame == py_trampoline_evaluator; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | _PyPerfTrampoline_GetCallbacks(_PyPerf_Callbacks *callbacks) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (callbacks == NULL) { | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     callbacks->init_state = trampoline_api.init_state; | 
					
						
							|  |  |  |     callbacks->write_state = trampoline_api.write_state; | 
					
						
							|  |  |  |     callbacks->free_state = trampoline_api.free_state; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (callbacks == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     if (trampoline_api.state) { | 
					
						
							|  |  |  |         _PyPerfTrampoline_Fini(); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     trampoline_api.init_state = callbacks->init_state; | 
					
						
							|  |  |  |     trampoline_api.write_state = callbacks->write_state; | 
					
						
							|  |  |  |     trampoline_api.free_state = callbacks->free_state; | 
					
						
							|  |  |  |     trampoline_api.state = NULL; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | _PyPerfTrampoline_Init(int activate) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     PyThreadState *tstate = _PyThreadState_GET(); | 
					
						
							|  |  |  |     if (tstate->interp->eval_frame && | 
					
						
							|  |  |  |         tstate->interp->eval_frame != py_trampoline_evaluator) { | 
					
						
							|  |  |  |         PyErr_SetString(PyExc_RuntimeError, | 
					
						
							|  |  |  |                         "Trampoline cannot be initialized as a custom eval " | 
					
						
							|  |  |  |                         "frame is already present"); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (!activate) { | 
					
						
							|  |  |  |         tstate->interp->eval_frame = NULL; | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |         perf_status = PERF_STATUS_NO_INIT; | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  |         tstate->interp->eval_frame = py_trampoline_evaluator; | 
					
						
							|  |  |  |         if (new_code_arena() < 0) { | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         extra_code_index = _PyEval_RequestCodeExtraIndex(NULL); | 
					
						
							|  |  |  |         if (extra_code_index == -1) { | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |         if (trampoline_api.state == NULL && trampoline_api.init_state != NULL) { | 
					
						
							|  |  |  |             trampoline_api.state = trampoline_api.init_state(); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |         perf_status = PERF_STATUS_OK; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | _PyPerfTrampoline_Fini(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     if (perf_status != PERF_STATUS_OK) { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     PyThreadState *tstate = _PyThreadState_GET(); | 
					
						
							|  |  |  |     if (tstate->interp->eval_frame == py_trampoline_evaluator) { | 
					
						
							|  |  |  |         tstate->interp->eval_frame = NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     if (perf_status == PERF_STATUS_OK) { | 
					
						
							|  |  |  |         trampoline_api.free_state(trampoline_api.state); | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         perf_trampoline_type = PERF_TRAMPOLINE_UNSET; | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     extra_code_index = -1; | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |     perf_status = PERF_STATUS_NO_INIT; | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | #endif
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  | void _PyPerfTrampoline_FreeArenas(void) { | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     free_code_arenas(); | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-26 20:57:29 -07:00
										 |  |  | int | 
					
						
							|  |  |  | PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){ | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							|  |  |  |     persist_after_fork = enable; | 
					
						
							|  |  |  |     return persist_after_fork; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  | PyStatus | 
					
						
							|  |  |  | _PyPerfTrampoline_AfterFork_Child(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef PY_HAVE_PERF_TRAMPOLINE
 | 
					
						
							| 
									
										
										
										
											2023-10-26 20:57:29 -07:00
										 |  |  |     if (persist_after_fork) { | 
					
						
							| 
									
										
										
										
											2024-05-05 03:07:29 +02:00
										 |  |  |         if (perf_trampoline_type != PERF_TRAMPOLINE_TYPE_MAP) { | 
					
						
							|  |  |  |             return PyStatus_Error("Failed to copy perf map file as perf trampoline type is not type map."); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-12-01 13:20:51 +00:00
										 |  |  |         _PyPerfTrampoline_Fini(); | 
					
						
							| 
									
										
										
										
											2023-10-26 20:57:29 -07:00
										 |  |  |         char filename[256]; | 
					
						
							|  |  |  |         pid_t parent_pid = getppid(); | 
					
						
							|  |  |  |         snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid); | 
					
						
							|  |  |  |         if (PyUnstable_CopyPerfMapFile(filename) != 0) { | 
					
						
							|  |  |  |             return PyStatus_Error("Failed to copy perf map file."); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         // Restart trampoline in file in child.
 | 
					
						
							|  |  |  |         int was_active = _PyIsPerfTrampolineActive(); | 
					
						
							|  |  |  |         _PyPerfTrampoline_Fini(); | 
					
						
							|  |  |  |         if (was_active) { | 
					
						
							|  |  |  |             _PyPerfTrampoline_Init(1); | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-08-30 18:11:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     return PyStatus_Ok(); | 
					
						
							|  |  |  | } |