gh-137838: Move _PyUOpInstruction buffer to PyInterpreterState (gh-138918)

This commit is contained in:
Donghee Na 2025-09-17 18:50:16 +01:00 committed by GitHub
parent a9b6b09141
commit d873fb42f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 87 additions and 31 deletions

View file

@ -14,6 +14,7 @@ extern "C" {
#include "pycore_structs.h" // PyHamtObject #include "pycore_structs.h" // PyHamtObject
#include "pycore_tstate.h" // _PyThreadStateImpl #include "pycore_tstate.h" // _PyThreadStateImpl
#include "pycore_typedefs.h" // _PyRuntimeState #include "pycore_typedefs.h" // _PyRuntimeState
#include "pycore_uop.h" // struct _PyUOpInstruction
#define CODE_MAX_WATCHERS 8 #define CODE_MAX_WATCHERS 8
@ -949,6 +950,8 @@ struct _is {
struct callable_cache callable_cache; struct callable_cache callable_cache;
PyObject *common_consts[NUM_COMMON_CONSTANTS]; PyObject *common_consts[NUM_COMMON_CONSTANTS];
bool jit; bool jit;
bool compiling;
struct _PyUOpInstruction *jit_uop_buffer;
struct _PyExecutorObject *executor_list_head; struct _PyExecutorObject *executor_list_head;
struct _PyExecutorObject *executor_deletion_list_head; struct _PyExecutorObject *executor_deletion_list_head;
struct _PyExecutorObject *cold_executor; struct _PyExecutorObject *cold_executor;

View file

@ -9,6 +9,7 @@ extern "C" {
#endif #endif
#include "pycore_typedefs.h" // _PyInterpreterFrame #include "pycore_typedefs.h" // _PyInterpreterFrame
#include "pycore_uop.h" // _PyUOpInstruction
#include "pycore_uop_ids.h" #include "pycore_uop_ids.h"
#include "pycore_stackref.h" // _PyStackRef #include "pycore_stackref.h" // _PyStackRef
#include <stdbool.h> #include <stdbool.h>
@ -41,32 +42,6 @@ typedef struct {
PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR).
} _PyVMData; } _PyVMData;
/* Depending on the format,
* the 32 bits between the oparg and operand are:
* UOP_FORMAT_TARGET:
* uint32_t target;
* UOP_FORMAT_JUMP
* uint16_t jump_target;
* uint16_t error_target;
*/
typedef struct {
uint16_t opcode:15;
uint16_t format:1;
uint16_t oparg;
union {
uint32_t target;
struct {
uint16_t jump_target;
uint16_t error_target;
};
};
uint64_t operand0; // A cache entry
uint64_t operand1;
#ifdef Py_STATS
uint64_t execution_count;
#endif
} _PyUOpInstruction;
typedef struct _PyExitData { typedef struct _PyExitData {
uint32_t target; uint32_t target;
uint16_t index; uint16_t index;
@ -118,9 +93,6 @@ PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
// trace_run_counter is greater than this value. // trace_run_counter is greater than this value.
#define JIT_CLEANUP_THRESHOLD 100000 #define JIT_CLEANUP_THRESHOLD 100000
// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 1200
#define TRACE_STACK_SIZE 5 #define TRACE_STACK_SIZE 5
int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame, int _Py_uop_analyze_and_optimize(_PyInterpreterFrame *frame,

View file

@ -0,0 +1,45 @@
#ifndef Py_CORE_UOP_H
#define Py_CORE_UOP_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
#include <stdint.h>
/* Depending on the format,
* the 32 bits between the oparg and operand are:
* UOP_FORMAT_TARGET:
* uint32_t target;
* UOP_FORMAT_JUMP
* uint16_t jump_target;
* uint16_t error_target;
*/
typedef struct _PyUOpInstruction{
uint16_t opcode:15;
uint16_t format:1;
uint16_t oparg;
union {
uint32_t target;
struct {
uint16_t jump_target;
uint16_t error_target;
};
};
uint64_t operand0; // A cache entry
uint64_t operand1;
#ifdef Py_STATS
uint64_t execution_count;
#endif
} _PyUOpInstruction;
// This is the length of the trace we project initially.
#define UOP_MAX_TRACE_LENGTH 1200
#define UOP_BUFFER_SIZE (UOP_MAX_TRACE_LENGTH * sizeof(_PyUOpInstruction))
#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_UOP_H */

View file

@ -1435,6 +1435,7 @@ PYTHON_HEADERS= \
$(srcdir)/Include/internal/pycore_unicodeobject_generated.h \ $(srcdir)/Include/internal/pycore_unicodeobject_generated.h \
$(srcdir)/Include/internal/pycore_unionobject.h \ $(srcdir)/Include/internal/pycore_unionobject.h \
$(srcdir)/Include/internal/pycore_uniqueid.h \ $(srcdir)/Include/internal/pycore_uniqueid.h \
$(srcdir)/Include/internal/pycore_uop.h \
$(srcdir)/Include/internal/pycore_uop_ids.h \ $(srcdir)/Include/internal/pycore_uop_ids.h \
$(srcdir)/Include/internal/pycore_uop_metadata.h \ $(srcdir)/Include/internal/pycore_uop_metadata.h \
$(srcdir)/Include/internal/pycore_warnings.h \ $(srcdir)/Include/internal/pycore_warnings.h \

View file

@ -882,6 +882,15 @@
<ClInclude Include="..\Include\internal\pycore_uniqueid.h"> <ClInclude Include="..\Include\internal\pycore_uniqueid.h">
<Filter>Include\internal</Filter> <Filter>Include\internal</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\Include\internal\pycore_uop.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_uop_ids.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\pycore_uop_metadata.h">
<Filter>Include\internal</Filter>
</ClInclude>
<ClInclude Include="..\Include\internal\mimalloc\mimalloc.h"> <ClInclude Include="..\Include\internal\mimalloc\mimalloc.h">
<Filter>Include\internal\mimalloc</Filter> <Filter>Include\internal\mimalloc</Filter>
</ClInclude> </ClInclude>

View file

@ -116,7 +116,10 @@ _PyOptimizer_Optimize(
_PyExecutorObject **executor_ptr, int chain_depth) _PyExecutorObject **executor_ptr, int chain_depth)
{ {
_PyStackRef *stack_pointer = frame->stackpointer; _PyStackRef *stack_pointer = frame->stackpointer;
assert(_PyInterpreterState_GET()->jit); PyInterpreterState *interp = _PyInterpreterState_GET();
assert(interp->jit);
assert(!interp->compiling);
interp->compiling = true;
// The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must* // The first executor in a chain and the MAX_CHAIN_DEPTH'th executor *must*
// make progress in order to avoid infinite loops or excessively-long // make progress in order to avoid infinite loops or excessively-long
// side-exit chains. We can only insert the executor into the bytecode if // side-exit chains. We can only insert the executor into the bytecode if
@ -126,10 +129,12 @@ _PyOptimizer_Optimize(
PyCodeObject *code = _PyFrame_GetCode(frame); PyCodeObject *code = _PyFrame_GetCode(frame);
assert(PyCode_Check(code)); assert(PyCode_Check(code));
if (progress_needed && !has_space_for_executor(code, start)) { if (progress_needed && !has_space_for_executor(code, start)) {
interp->compiling = false;
return 0; return 0;
} }
int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed); int err = uop_optimize(frame, start, executor_ptr, (int)(stack_pointer - _PyFrame_Stackbase(frame)), progress_needed);
if (err <= 0) { if (err <= 0) {
interp->compiling = false;
return err; return err;
} }
assert(*executor_ptr != NULL); assert(*executor_ptr != NULL);
@ -143,6 +148,7 @@ _PyOptimizer_Optimize(
* it might get confused by the executor disappearing, * it might get confused by the executor disappearing,
* but there is not much we can do about that here. */ * but there is not much we can do about that here. */
Py_DECREF(*executor_ptr); Py_DECREF(*executor_ptr);
interp->compiling = false;
return 0; return 0;
} }
insert_executor(code, start, index, *executor_ptr); insert_executor(code, start, index, *executor_ptr);
@ -152,6 +158,7 @@ _PyOptimizer_Optimize(
} }
(*executor_ptr)->vm_data.chain_depth = chain_depth; (*executor_ptr)->vm_data.chain_depth = chain_depth;
assert((*executor_ptr)->vm_data.valid); assert((*executor_ptr)->vm_data.valid);
interp->compiling = false;
return 1; return 1;
} }
@ -1280,7 +1287,14 @@ uop_optimize(
{ {
_PyBloomFilter dependencies; _PyBloomFilter dependencies;
_Py_BloomFilter_Init(&dependencies); _Py_BloomFilter_Init(&dependencies);
_PyUOpInstruction buffer[UOP_MAX_TRACE_LENGTH]; PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->jit_uop_buffer == NULL) {
interp->jit_uop_buffer = (_PyUOpInstruction *)_PyObject_VirtualAlloc(UOP_BUFFER_SIZE);
if (interp->jit_uop_buffer == NULL) {
return 0;
}
}
_PyUOpInstruction *buffer = interp->jit_uop_buffer;
OPT_STAT_INC(attempts); OPT_STAT_INC(attempts);
char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE");
bool is_noopt = true; bool is_noopt = true;

View file

@ -1702,6 +1702,7 @@ finalize_modules(PyThreadState *tstate)
// Invalidate all executors and turn off JIT: // Invalidate all executors and turn off JIT:
interp->jit = false; interp->jit = false;
interp->compiling = false;
#ifdef _Py_TIER2 #ifdef _Py_TIER2
_Py_Executors_InvalidateAll(interp, 0); _Py_Executors_InvalidateAll(interp, 0);
#endif #endif

View file

@ -22,6 +22,7 @@
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_runtime_init.h" // _PyRuntimeState_INIT
#include "pycore_stackref.h" // Py_STACKREF_DEBUG #include "pycore_stackref.h" // Py_STACKREF_DEBUG
#include "pycore_time.h" // _PyTime_Init() #include "pycore_time.h" // _PyTime_Init()
#include "pycore_uop.h" // UOP_BUFFER_SIZE
#include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts() #include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts()
@ -550,6 +551,11 @@ init_interpreter(PyInterpreterState *interp,
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
_Py_brc_init_state(interp); _Py_brc_init_state(interp);
#endif #endif
#ifdef _Py_TIER2
// Ensure the buffer is to be set as NULL.
interp->jit_uop_buffer = NULL;
#endif
llist_init(&interp->mem_free_queue.head); llist_init(&interp->mem_free_queue.head);
llist_init(&interp->asyncio_tasks_head); llist_init(&interp->asyncio_tasks_head);
interp->asyncio_tasks_lock = (PyMutex){0}; interp->asyncio_tasks_lock = (PyMutex){0};
@ -565,6 +571,7 @@ init_interpreter(PyInterpreterState *interp,
} }
interp->_code_object_generation = 0; interp->_code_object_generation = 0;
interp->jit = false; interp->jit = false;
interp->compiling = false;
interp->executor_list_head = NULL; interp->executor_list_head = NULL;
interp->executor_deletion_list_head = NULL; interp->executor_deletion_list_head = NULL;
interp->executor_deletion_list_remaining_capacity = 0; interp->executor_deletion_list_remaining_capacity = 0;
@ -797,6 +804,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
#ifdef _Py_TIER2 #ifdef _Py_TIER2
_Py_ClearExecutorDeletionList(interp); _Py_ClearExecutorDeletionList(interp);
if (interp->jit_uop_buffer != NULL) {
_PyObject_VirtualFree(interp->jit_uop_buffer, UOP_BUFFER_SIZE);
interp->jit_uop_buffer = NULL;
}
#endif #endif
_PyAST_Fini(interp); _PyAST_Fini(interp);
_PyAtExit_Fini(interp); _PyAtExit_Fini(interp);