| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | #include "Python.h"
 | 
					
						
							|  |  |  | #include "opcode.h"
 | 
					
						
							|  |  |  | #include "pycore_interp.h"
 | 
					
						
							|  |  |  | #include "pycore_opcode.h"
 | 
					
						
							| 
									
										
										
										
											2023-06-26 19:02:57 -07:00
										 |  |  | #include "opcode_metadata.h"
 | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | #include "pycore_pystate.h"
 | 
					
						
							| 
									
										
										
										
											2023-06-26 19:02:57 -07:00
										 |  |  | #include "pycore_uops.h"
 | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | #include "cpython/optimizer.h"
 | 
					
						
							|  |  |  | #include <stdbool.h>
 | 
					
						
							|  |  |  | #include <stdint.h>
 | 
					
						
							|  |  |  | #include <stddef.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  | static bool | 
					
						
							|  |  |  | has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (instr->op.code == ENTER_EXECUTOR) { | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (code->co_executors == NULL) { | 
					
						
							|  |  |  |         return true; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return code->co_executors->size < 256; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | static int32_t | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  | get_index_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr) | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     if (instr->op.code == ENTER_EXECUTOR) { | 
					
						
							|  |  |  |         return instr->op.arg; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     _PyExecutorArray *old = code->co_executors; | 
					
						
							|  |  |  |     int size = 0; | 
					
						
							|  |  |  |     int capacity = 0; | 
					
						
							|  |  |  |     if (old != NULL) { | 
					
						
							|  |  |  |         size = old->size; | 
					
						
							|  |  |  |         capacity = old->capacity; | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |         assert(size < 256); | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     assert(size <= capacity); | 
					
						
							|  |  |  |     if (size == capacity) { | 
					
						
							|  |  |  |         /* Array is full. Grow array */ | 
					
						
							|  |  |  |         int new_capacity = capacity ? capacity * 2 : 4; | 
					
						
							|  |  |  |         _PyExecutorArray *new = PyMem_Realloc( | 
					
						
							|  |  |  |             old, | 
					
						
							|  |  |  |             offsetof(_PyExecutorArray, executors) + | 
					
						
							|  |  |  |             new_capacity * sizeof(_PyExecutorObject *)); | 
					
						
							|  |  |  |         if (new == NULL) { | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         new->capacity = new_capacity; | 
					
						
							|  |  |  |         new->size = size; | 
					
						
							|  |  |  |         code->co_executors = new; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     assert(size < code->co_executors->capacity); | 
					
						
							|  |  |  |     return size; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorObject *executor) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     Py_INCREF(executor); | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     if (instr->op.code == ENTER_EXECUTOR) { | 
					
						
							|  |  |  |         assert(index == instr->op.arg); | 
					
						
							|  |  |  |         _PyExecutorObject *old = code->co_executors->executors[index]; | 
					
						
							|  |  |  |         executor->vm_data.opcode = old->vm_data.opcode; | 
					
						
							|  |  |  |         executor->vm_data.oparg = old->vm_data.oparg; | 
					
						
							|  |  |  |         old->vm_data.opcode = 0; | 
					
						
							|  |  |  |         code->co_executors->executors[index] = executor; | 
					
						
							|  |  |  |         Py_DECREF(old); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |         assert(code->co_executors->size == index); | 
					
						
							|  |  |  |         assert(code->co_executors->capacity > index); | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |         executor->vm_data.opcode = instr->op.code; | 
					
						
							|  |  |  |         executor->vm_data.oparg = instr->op.arg; | 
					
						
							|  |  |  |         code->co_executors->executors[index] = executor; | 
					
						
							|  |  |  |         assert(index < 256); | 
					
						
							|  |  |  |         instr->op.code = ENTER_EXECUTOR; | 
					
						
							|  |  |  |         instr->op.arg = index; | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |         code->co_executors->size++; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     return; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | PyUnstable_Replace_Executor(PyCodeObject *code, _Py_CODEUNIT *instr, _PyExecutorObject *new) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (instr->op.code != ENTER_EXECUTOR) { | 
					
						
							|  |  |  |         PyErr_Format(PyExc_ValueError, "No executor to replace"); | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     int index = instr->op.arg; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     assert(index >= 0); | 
					
						
							|  |  |  |     insert_executor(code, instr, index, new); | 
					
						
							|  |  |  |     return 0; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  | static int | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | error_optimize( | 
					
						
							|  |  |  |     _PyOptimizerObject* self, | 
					
						
							|  |  |  |     PyCodeObject *code, | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |     _Py_CODEUNIT *instr, | 
					
						
							|  |  |  |     _PyExecutorObject **exec) | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | { | 
					
						
							|  |  |  |     PyErr_Format(PyExc_SystemError, "Should never call error_optimize"); | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |     return -1; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyTypeObject DefaultOptimizer_Type = { | 
					
						
							|  |  |  |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
					
						
							|  |  |  |     .tp_name = "noop_optimizer", | 
					
						
							|  |  |  |     .tp_basicsize = sizeof(_PyOptimizerObject), | 
					
						
							|  |  |  |     .tp_itemsize = 0, | 
					
						
							|  |  |  |     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _PyOptimizerObject _PyOptimizer_Default = { | 
					
						
							|  |  |  |     PyObject_HEAD_INIT(&DefaultOptimizer_Type) | 
					
						
							|  |  |  |     .optimize = error_optimize, | 
					
						
							|  |  |  |     .resume_threshold = UINT16_MAX, | 
					
						
							|  |  |  |     .backedge_threshold = UINT16_MAX, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _PyOptimizerObject * | 
					
						
							|  |  |  | PyUnstable_GetOptimizer(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyInterpreterState *interp = PyInterpreterState_Get(); | 
					
						
							|  |  |  |     if (interp->optimizer == &_PyOptimizer_Default) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     assert(interp->optimizer_backedge_threshold == interp->optimizer->backedge_threshold); | 
					
						
							|  |  |  |     assert(interp->optimizer_resume_threshold == interp->optimizer->resume_threshold); | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     Py_INCREF(interp->optimizer); | 
					
						
							|  |  |  |     return interp->optimizer; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     PyInterpreterState *interp = PyInterpreterState_Get(); | 
					
						
							|  |  |  |     if (optimizer == NULL) { | 
					
						
							|  |  |  |         optimizer = &_PyOptimizer_Default; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     _PyOptimizerObject *old = interp->optimizer; | 
					
						
							|  |  |  |     Py_INCREF(optimizer); | 
					
						
							|  |  |  |     interp->optimizer = optimizer; | 
					
						
							|  |  |  |     interp->optimizer_backedge_threshold = optimizer->backedge_threshold; | 
					
						
							|  |  |  |     interp->optimizer_resume_threshold = optimizer->resume_threshold; | 
					
						
							|  |  |  |     Py_DECREF(old); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _PyInterpreterFrame * | 
					
						
							|  |  |  | _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNIT *dest, PyObject **stack_pointer) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2023-06-14 13:46:37 +01:00
										 |  |  |     PyCodeObject *code = (PyCodeObject *)frame->f_executable; | 
					
						
							|  |  |  |     assert(PyCode_Check(code)); | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     PyInterpreterState *interp = PyInterpreterState_Get(); | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     if (!has_space_for_executor(code, src)) { | 
					
						
							|  |  |  |         goto jump_to_destination; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     _PyOptimizerObject *opt = interp->optimizer; | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     _PyExecutorObject *executor = NULL; | 
					
						
							| 
									
										
										
										
											2023-06-14 13:46:37 +01:00
										 |  |  |     int err = opt->optimize(opt, code, dest, &executor); | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |     if (err <= 0) { | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |         assert(executor == NULL); | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |         if (err < 0) { | 
					
						
							|  |  |  |             return NULL; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |         goto jump_to_destination; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     int index = get_index_for_executor(code, src); | 
					
						
							|  |  |  |     if (index < 0) { | 
					
						
							|  |  |  |         /* Out of memory. Don't raise and assume that the
 | 
					
						
							|  |  |  |          * error will show up elsewhere. | 
					
						
							|  |  |  |          * | 
					
						
							|  |  |  |          * If an optimizer has already produced an executor, | 
					
						
							|  |  |  |          * it might get confused by the executor disappearing, | 
					
						
							|  |  |  |          * but there is not much we can do about that here. */ | 
					
						
							|  |  |  |         Py_DECREF(executor); | 
					
						
							|  |  |  |         goto jump_to_destination; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-06-14 13:46:37 +01:00
										 |  |  |     insert_executor(code, src, index, executor); | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  |     assert(frame->prev_instr == src); | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     return executor->execute(executor, frame, stack_pointer); | 
					
						
							| 
									
										
										
										
											2023-06-19 10:32:20 +01:00
										 |  |  | jump_to_destination: | 
					
						
							|  |  |  |     frame->prev_instr = dest - 1; | 
					
						
							|  |  |  |     _PyFrame_SetStackPointer(frame, stack_pointer); | 
					
						
							|  |  |  |     return frame; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | /** Test support **/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     _PyOptimizerObject base; | 
					
						
							|  |  |  |     int64_t count; | 
					
						
							|  |  |  | } _PyCounterOptimizerObject; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | typedef struct { | 
					
						
							|  |  |  |     _PyExecutorObject executor; | 
					
						
							|  |  |  |     _PyCounterOptimizerObject *optimizer; | 
					
						
							|  |  |  |     _Py_CODEUNIT *next_instr; | 
					
						
							|  |  |  | } _PyCounterExecutorObject; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | counter_dealloc(_PyCounterExecutorObject *self) { | 
					
						
							|  |  |  |     Py_DECREF(self->optimizer); | 
					
						
							|  |  |  |     PyObject_Free(self); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyTypeObject CounterExecutor_Type = { | 
					
						
							|  |  |  |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
					
						
							|  |  |  |     .tp_name = "counting_executor", | 
					
						
							|  |  |  |     .tp_basicsize = sizeof(_PyCounterExecutorObject), | 
					
						
							|  |  |  |     .tp_itemsize = 0, | 
					
						
							|  |  |  |     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, | 
					
						
							|  |  |  |     .tp_dealloc = (destructor)counter_dealloc, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static _PyInterpreterFrame * | 
					
						
							|  |  |  | counter_execute(_PyExecutorObject *self, _PyInterpreterFrame *frame, PyObject **stack_pointer) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     ((_PyCounterExecutorObject *)self)->optimizer->count++; | 
					
						
							|  |  |  |     _PyFrame_SetStackPointer(frame, stack_pointer); | 
					
						
							|  |  |  |     frame->prev_instr = ((_PyCounterExecutorObject *)self)->next_instr - 1; | 
					
						
							|  |  |  |     Py_DECREF(self); | 
					
						
							|  |  |  |     return frame; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  | static int | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | counter_optimize( | 
					
						
							|  |  |  |     _PyOptimizerObject* self, | 
					
						
							|  |  |  |     PyCodeObject *code, | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |     _Py_CODEUNIT *instr, | 
					
						
							|  |  |  |     _PyExecutorObject **exec_ptr) | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | { | 
					
						
							|  |  |  |     _PyCounterExecutorObject *executor = (_PyCounterExecutorObject *)_PyObject_New(&CounterExecutor_Type); | 
					
						
							|  |  |  |     if (executor == NULL) { | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |         return -1; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     executor->executor.execute = counter_execute; | 
					
						
							|  |  |  |     Py_INCREF(self); | 
					
						
							|  |  |  |     executor->optimizer = (_PyCounterOptimizerObject *)self; | 
					
						
							|  |  |  |     executor->next_instr = instr; | 
					
						
							| 
									
										
										
										
											2023-06-05 09:44:23 +01:00
										 |  |  |     *exec_ptr = (_PyExecutorObject *)executor; | 
					
						
							|  |  |  |     return 1; | 
					
						
							| 
									
										
										
										
											2023-06-02 11:46:18 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							|  |  |  | counter_get_counter(PyObject *self, PyObject *args) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     return PyLong_FromLongLong(((_PyCounterOptimizerObject *)self)->count); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyMethodDef counter_methods[] = { | 
					
						
							|  |  |  |     { "get_count", counter_get_counter, METH_NOARGS, NULL }, | 
					
						
							|  |  |  |     { NULL, NULL }, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyTypeObject CounterOptimizer_Type = { | 
					
						
							|  |  |  |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
					
						
							|  |  |  |     .tp_name = "Counter optimizer", | 
					
						
							|  |  |  |     .tp_basicsize = sizeof(_PyCounterOptimizerObject), | 
					
						
							|  |  |  |     .tp_itemsize = 0, | 
					
						
							|  |  |  |     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, | 
					
						
							|  |  |  |     .tp_methods = counter_methods, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PyObject * | 
					
						
							|  |  |  | PyUnstable_Optimizer_NewCounter(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     _PyCounterOptimizerObject *opt = (_PyCounterOptimizerObject *)_PyObject_New(&CounterOptimizer_Type); | 
					
						
							|  |  |  |     if (opt == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     opt->base.optimize = counter_optimize; | 
					
						
							|  |  |  |     opt->base.resume_threshold = UINT16_MAX; | 
					
						
							|  |  |  |     opt->base.backedge_threshold = 0; | 
					
						
							|  |  |  |     opt->count = 0; | 
					
						
							|  |  |  |     return (PyObject *)opt; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-06-26 19:02:57 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | ///////////////////// Experimental UOp Optimizer /////////////////////
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #ifdef Py_DEBUG
 | 
					
						
							|  |  |  |    /* For debugging the interpreter: */ | 
					
						
							|  |  |  | #  define LLTRACE  1      /* Low-level trace feature */
 | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static void | 
					
						
							|  |  |  | uop_dealloc(_PyUOpExecutorObject *self) { | 
					
						
							|  |  |  |     PyObject_Free(self); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyTypeObject UOpExecutor_Type = { | 
					
						
							|  |  |  |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
					
						
							|  |  |  |     .tp_name = "uop_executor", | 
					
						
							|  |  |  |     .tp_basicsize = sizeof(_PyUOpExecutorObject), | 
					
						
							|  |  |  |     .tp_itemsize = 0, | 
					
						
							|  |  |  |     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, | 
					
						
							|  |  |  |     .tp_dealloc = (destructor)uop_dealloc, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | translate_bytecode_to_trace( | 
					
						
							|  |  |  |     PyCodeObject *code, | 
					
						
							|  |  |  |     _Py_CODEUNIT *instr, | 
					
						
							|  |  |  |     _PyUOpInstruction *trace, | 
					
						
							|  |  |  |     int max_length) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  | #ifdef LLTRACE
 | 
					
						
							|  |  |  |     char *uop_debug = Py_GETENV("PYTHONUOPSDEBUG"); | 
					
						
							|  |  |  |     int lltrace = 0; | 
					
						
							|  |  |  |     if (uop_debug != NULL && *uop_debug >= '0') { | 
					
						
							|  |  |  |         lltrace = *uop_debug - '0';  // TODO: Parse an int and all that
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (lltrace >= 4) { | 
					
						
							|  |  |  |         fprintf(stderr, | 
					
						
							|  |  |  |                 "Optimizing %s (%s:%d) at offset %ld\n", | 
					
						
							|  |  |  |                 PyUnicode_AsUTF8(code->co_qualname), | 
					
						
							|  |  |  |                 PyUnicode_AsUTF8(code->co_filename), | 
					
						
							|  |  |  |                 code->co_firstlineno, | 
					
						
							|  |  |  |                 (long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | #define ADD_TO_TRACE(OPCODE, OPERAND) \
 | 
					
						
							|  |  |  |         if (lltrace >= 2) { \ | 
					
						
							| 
									
										
										
										
											2023-06-28 11:28:07 -07:00
										 |  |  |             const char *opname = (OPCODE) < 256 ? _PyOpcode_OpName[(OPCODE)] : _PyOpcode_uop_name[(OPCODE)]; \ | 
					
						
							|  |  |  |             fprintf(stderr, "  ADD_TO_TRACE(%s, %" PRIu64 ")\n", opname, (uint64_t)(OPERAND)); \ | 
					
						
							| 
									
										
										
										
											2023-06-26 19:02:57 -07:00
										 |  |  |         } \ | 
					
						
							|  |  |  |         trace[trace_length].opcode = (OPCODE); \ | 
					
						
							|  |  |  |         trace[trace_length].operand = (OPERAND); \ | 
					
						
							|  |  |  |         trace_length++; | 
					
						
							|  |  |  | #else
 | 
					
						
							|  |  |  | #define ADD_TO_TRACE(OPCODE, OPERAND) \
 | 
					
						
							|  |  |  |         trace[trace_length].opcode = (OPCODE); \ | 
					
						
							|  |  |  |         trace[trace_length].operand = (OPERAND); \ | 
					
						
							|  |  |  |         trace_length++; | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int trace_length = 0; | 
					
						
							|  |  |  |     // Always reserve space for one uop, plus SET_UP, plus EXIT_TRACE
 | 
					
						
							|  |  |  |     while (trace_length + 3 <= max_length) { | 
					
						
							|  |  |  |         int opcode = instr->op.code; | 
					
						
							|  |  |  |         uint64_t operand = instr->op.arg; | 
					
						
							|  |  |  |         switch (opcode) { | 
					
						
							|  |  |  |             case LOAD_FAST_LOAD_FAST: | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 // Reserve space for two uops (+ SETUP + EXIT_TRACE)
 | 
					
						
							|  |  |  |                 if (trace_length + 4 > max_length) { | 
					
						
							|  |  |  |                     goto done; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 uint64_t oparg1 = operand >> 4; | 
					
						
							|  |  |  |                 uint64_t oparg2 = operand & 15; | 
					
						
							|  |  |  |                 ADD_TO_TRACE(LOAD_FAST, oparg1); | 
					
						
							|  |  |  |                 ADD_TO_TRACE(LOAD_FAST, oparg2); | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             default: | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; | 
					
						
							|  |  |  |                 if (expansion->nuops > 0) { | 
					
						
							|  |  |  |                     // Reserve space for nuops (+ SETUP + EXIT_TRACE)
 | 
					
						
							|  |  |  |                     int nuops = expansion->nuops; | 
					
						
							|  |  |  |                     if (trace_length + nuops + 2 > max_length) { | 
					
						
							|  |  |  |                         goto done; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     for (int i = 0; i < nuops; i++) { | 
					
						
							|  |  |  |                         int offset = expansion->uops[i].offset; | 
					
						
							|  |  |  |                         switch (expansion->uops[i].size) { | 
					
						
							|  |  |  |                             case 0: | 
					
						
							|  |  |  |                                 break; | 
					
						
							|  |  |  |                             case 1: | 
					
						
							|  |  |  |                                 operand = read_u16(&instr[offset].cache); | 
					
						
							|  |  |  |                                 break; | 
					
						
							|  |  |  |                             case 2: | 
					
						
							|  |  |  |                                 operand = read_u32(&instr[offset].cache); | 
					
						
							|  |  |  |                                 break; | 
					
						
							|  |  |  |                             case 4: | 
					
						
							|  |  |  |                                 operand = read_u64(&instr[offset].cache); | 
					
						
							|  |  |  |                                 break; | 
					
						
							|  |  |  |                             default: | 
					
						
							|  |  |  |                                 fprintf(stderr, | 
					
						
							|  |  |  |                                         "opcode=%d, operand=%" PRIu64 "; nuops=%d, i=%d; size=%d, offset=%d\n", | 
					
						
							|  |  |  |                                         opcode, operand, nuops, i, | 
					
						
							|  |  |  |                                         expansion->uops[i].size, | 
					
						
							|  |  |  |                                         expansion->uops[i].offset); | 
					
						
							|  |  |  |                                 Py_FatalError("garbled expansion"); | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                         ADD_TO_TRACE(expansion->uops[i].uop, operand); | 
					
						
							|  |  |  |                         assert(expansion->uops[0].size == 0);  // TODO
 | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                     break; | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 // fprintf(stderr, "Unsupported opcode %d\n", opcode);
 | 
					
						
							|  |  |  |                 goto done;  // Break out of while loop
 | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         instr++; | 
					
						
							|  |  |  |         // Add cache size for opcode
 | 
					
						
							|  |  |  |         instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]]; | 
					
						
							|  |  |  |         ADD_TO_TRACE(SET_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive)); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | done: | 
					
						
							|  |  |  |     if (trace_length > 0) { | 
					
						
							|  |  |  |         ADD_TO_TRACE(EXIT_TRACE, 0); | 
					
						
							|  |  |  | #ifdef LLTRACE
 | 
					
						
							|  |  |  |         if (lltrace >= 1) { | 
					
						
							|  |  |  |             fprintf(stderr, | 
					
						
							|  |  |  |                     "Created a trace for %s (%s:%d) at offset %ld -- length %d\n", | 
					
						
							|  |  |  |                     PyUnicode_AsUTF8(code->co_qualname), | 
					
						
							|  |  |  |                     PyUnicode_AsUTF8(code->co_filename), | 
					
						
							|  |  |  |                     code->co_firstlineno, | 
					
						
							|  |  |  |                     (long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive), | 
					
						
							|  |  |  |                     trace_length); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else { | 
					
						
							|  |  |  | #ifdef LLTRACE
 | 
					
						
							|  |  |  |         if (lltrace >= 4) { | 
					
						
							|  |  |  |             fprintf(stderr, | 
					
						
							|  |  |  |                     "No trace for %s (%s:%d) at offset %ld\n", | 
					
						
							|  |  |  |                     PyUnicode_AsUTF8(code->co_qualname), | 
					
						
							|  |  |  |                     PyUnicode_AsUTF8(code->co_filename), | 
					
						
							|  |  |  |                     code->co_firstlineno, | 
					
						
							|  |  |  |                     (long)(instr - (_Py_CODEUNIT *)code->co_code_adaptive)); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | #endif
 | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return trace_length; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #undef ADD_TO_TRACE
 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | uop_optimize( | 
					
						
							|  |  |  |     _PyOptimizerObject *self, | 
					
						
							|  |  |  |     PyCodeObject *code, | 
					
						
							|  |  |  |     _Py_CODEUNIT *instr, | 
					
						
							|  |  |  |     _PyExecutorObject **exec_ptr) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     _PyUOpInstruction trace[_Py_UOP_MAX_TRACE_LENGTH]; | 
					
						
							|  |  |  |     int trace_length = translate_bytecode_to_trace(code, instr, trace, _Py_UOP_MAX_TRACE_LENGTH); | 
					
						
							|  |  |  |     if (trace_length <= 0) { | 
					
						
							|  |  |  |         // Error or nothing translated
 | 
					
						
							|  |  |  |         return trace_length; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     OBJECT_STAT_INC(optimization_traces_created); | 
					
						
							|  |  |  |     _PyUOpExecutorObject *executor = (_PyUOpExecutorObject *)_PyObject_New(&UOpExecutor_Type); | 
					
						
							|  |  |  |     if (executor == NULL) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     executor->base.execute = _PyUopExecute; | 
					
						
							|  |  |  |     memcpy(executor->trace, trace, trace_length * sizeof(_PyUOpInstruction)); | 
					
						
							|  |  |  |     *exec_ptr = (_PyExecutorObject *)executor; | 
					
						
							|  |  |  |     return 1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyTypeObject UOpOptimizer_Type = { | 
					
						
							|  |  |  |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
					
						
							|  |  |  |     .tp_name = "uop_optimizer", | 
					
						
							|  |  |  |     .tp_basicsize = sizeof(_PyOptimizerObject), | 
					
						
							|  |  |  |     .tp_itemsize = 0, | 
					
						
							|  |  |  |     .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION, | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | PyObject * | 
					
						
							|  |  |  | PyUnstable_Optimizer_NewUOpOptimizer(void) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     _PyOptimizerObject *opt = (_PyOptimizerObject *)_PyObject_New(&UOpOptimizer_Type); | 
					
						
							|  |  |  |     if (opt == NULL) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     opt->optimize = uop_optimize; | 
					
						
							|  |  |  |     opt->resume_threshold = UINT16_MAX; | 
					
						
							| 
									
										
										
										
											2023-06-28 11:28:07 -07:00
										 |  |  |     // Need at least 3 iterations to settle specializations.
 | 
					
						
							|  |  |  |     // A few lower bits of the counter are reserved for other flags.
 | 
					
						
							|  |  |  |     opt->backedge_threshold = 3 << OPTIMIZER_BITS_IN_COUNTER; | 
					
						
							| 
									
										
										
										
											2023-06-26 19:02:57 -07:00
										 |  |  |     return (PyObject *)opt; | 
					
						
							|  |  |  | } |