mirror of
https://github.com/python/cpython.git
synced 2026-04-13 23:31:02 +00:00
gh-144888: Replace bloom filter linked lists with continuous arrays to optimize executor invalidating performance (GH-145873)
This commit is contained in:
parent
e18abc6a1f
commit
81ef1b7317
9 changed files with 102 additions and 70 deletions
19
Python/jit.c
19
Python/jit.c
|
|
@ -62,6 +62,23 @@ jit_error(const char *message)
|
|||
|
||||
static size_t _Py_jit_shim_size = 0;
|
||||
|
||||
static int
|
||||
address_in_executor_array(_PyExecutorObject **ptrs, size_t count, uintptr_t addr)
|
||||
{
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
_PyExecutorObject *exec = ptrs[i];
|
||||
if (exec->jit_code == NULL || exec->jit_size == 0) {
|
||||
continue;
|
||||
}
|
||||
uintptr_t start = (uintptr_t)exec->jit_code;
|
||||
uintptr_t end = start + exec->jit_size;
|
||||
if (addr >= start && addr < end) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
address_in_executor_list(_PyExecutorObject *head, uintptr_t addr)
|
||||
{
|
||||
|
|
@ -94,7 +111,7 @@ _PyJIT_AddressInJitCode(PyInterpreterState *interp, uintptr_t addr)
|
|||
return 1;
|
||||
}
|
||||
}
|
||||
if (address_in_executor_list(interp->executor_list_head, addr)) {
|
||||
if (address_in_executor_array(interp->executor_ptrs, interp->executor_count, addr)) {
|
||||
return 1;
|
||||
}
|
||||
if (address_in_executor_list(interp->executor_deletion_list_head, addr)) {
|
||||
|
|
|
|||
|
|
@ -1379,7 +1379,10 @@ make_executor_from_uops(_PyThreadStateImpl *tstate, _PyUOpInstruction *buffer, i
|
|||
// linking of executor. Otherwise, the GC tries to untrack a
|
||||
// still untracked object during dealloc.
|
||||
_PyObject_GC_TRACK(executor);
|
||||
_Py_ExecutorInit(executor, dependencies);
|
||||
if (_Py_ExecutorInit(executor, dependencies) < 0) {
|
||||
Py_DECREF(executor);
|
||||
return NULL;
|
||||
}
|
||||
#ifdef Py_DEBUG
|
||||
char *python_lltrace = Py_GETENV("PYTHON_LLTRACE");
|
||||
int lltrace = 0;
|
||||
|
|
@ -1646,59 +1649,63 @@ bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes)
|
|||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
link_executor(_PyExecutorObject *executor)
|
||||
static int
|
||||
link_executor(_PyExecutorObject *executor, const _PyBloomFilter *bloom)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyExecutorLinkListNode *links = &executor->vm_data.links;
|
||||
_PyExecutorObject *head = interp->executor_list_head;
|
||||
if (head == NULL) {
|
||||
interp->executor_list_head = executor;
|
||||
links->previous = NULL;
|
||||
links->next = NULL;
|
||||
if (interp->executor_count == interp->executor_capacity) {
|
||||
size_t new_cap = interp->executor_capacity ? interp->executor_capacity * 2 : 64;
|
||||
_PyBloomFilter *new_blooms = PyMem_Realloc(
|
||||
interp->executor_blooms, new_cap * sizeof(_PyBloomFilter));
|
||||
if (new_blooms == NULL) {
|
||||
return -1;
|
||||
}
|
||||
_PyExecutorObject **new_ptrs = PyMem_Realloc(
|
||||
interp->executor_ptrs, new_cap * sizeof(_PyExecutorObject *));
|
||||
if (new_ptrs == NULL) {
|
||||
/* Revert blooms realloc — the old pointer may have been freed by
|
||||
* a successful realloc, but new_blooms is the valid pointer. */
|
||||
interp->executor_blooms = new_blooms;
|
||||
return -1;
|
||||
}
|
||||
interp->executor_blooms = new_blooms;
|
||||
interp->executor_ptrs = new_ptrs;
|
||||
interp->executor_capacity = new_cap;
|
||||
}
|
||||
else {
|
||||
assert(head->vm_data.links.previous == NULL);
|
||||
links->previous = NULL;
|
||||
links->next = head;
|
||||
head->vm_data.links.previous = executor;
|
||||
interp->executor_list_head = executor;
|
||||
}
|
||||
/* executor_list_head must be first in list */
|
||||
assert(interp->executor_list_head->vm_data.links.previous == NULL);
|
||||
size_t idx = interp->executor_count++;
|
||||
interp->executor_blooms[idx] = *bloom;
|
||||
interp->executor_ptrs[idx] = executor;
|
||||
executor->vm_data.bloom_array_idx = (int32_t)idx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
unlink_executor(_PyExecutorObject *executor)
|
||||
{
|
||||
_PyExecutorLinkListNode *links = &executor->vm_data.links;
|
||||
_PyExecutorObject *next = links->next;
|
||||
_PyExecutorObject *prev = links->previous;
|
||||
if (next != NULL) {
|
||||
next->vm_data.links.previous = prev;
|
||||
}
|
||||
if (prev != NULL) {
|
||||
prev->vm_data.links.next = next;
|
||||
}
|
||||
else {
|
||||
// prev == NULL implies that executor is the list head
|
||||
PyInterpreterState *interp = PyInterpreterState_Get();
|
||||
assert(interp->executor_list_head == executor);
|
||||
interp->executor_list_head = next;
|
||||
PyInterpreterState *interp = PyInterpreterState_Get();
|
||||
int32_t idx = executor->vm_data.bloom_array_idx;
|
||||
assert(idx >= 0 && (size_t)idx < interp->executor_count);
|
||||
size_t last = --interp->executor_count;
|
||||
if ((size_t)idx != last) {
|
||||
/* Swap-remove: move the last element into the vacated slot */
|
||||
interp->executor_blooms[idx] = interp->executor_blooms[last];
|
||||
interp->executor_ptrs[idx] = interp->executor_ptrs[last];
|
||||
interp->executor_ptrs[idx]->vm_data.bloom_array_idx = idx;
|
||||
}
|
||||
executor->vm_data.bloom_array_idx = -1;
|
||||
}
|
||||
|
||||
/* This must be called by optimizers before using the executor */
|
||||
void
|
||||
int
|
||||
_Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_set)
|
||||
{
|
||||
executor->vm_data.valid = true;
|
||||
executor->vm_data.pending_deletion = 0;
|
||||
executor->vm_data.code = NULL;
|
||||
for (int i = 0; i < _Py_BLOOM_FILTER_WORDS; i++) {
|
||||
executor->vm_data.bloom.bits[i] = dependency_set->bits[i];
|
||||
if (link_executor(executor, dependency_set) < 0) {
|
||||
return -1;
|
||||
}
|
||||
link_executor(executor);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static _PyExecutorObject *
|
||||
|
|
@ -1809,11 +1816,15 @@ void
|
|||
_Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj)
|
||||
{
|
||||
assert(executor->vm_data.valid);
|
||||
_Py_BloomFilter_Add(&executor->vm_data.bloom, obj);
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
int32_t idx = executor->vm_data.bloom_array_idx;
|
||||
assert(idx >= 0 && (size_t)idx < interp->executor_count);
|
||||
_Py_BloomFilter_Add(&interp->executor_blooms[idx], obj);
|
||||
}
|
||||
|
||||
/* Invalidate all executors that depend on `obj`
|
||||
* May cause other executors to be invalidated as well
|
||||
* May cause other executors to be invalidated as well.
|
||||
* Uses contiguous bloom filter array for cache-friendly scanning.
|
||||
*/
|
||||
void
|
||||
_Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation)
|
||||
|
|
@ -1821,23 +1832,20 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is
|
|||
_PyBloomFilter obj_filter;
|
||||
_Py_BloomFilter_Init(&obj_filter);
|
||||
_Py_BloomFilter_Add(&obj_filter, obj);
|
||||
/* Walk the list of executors */
|
||||
/* TO DO -- Use a tree to avoid traversing as many objects */
|
||||
/* Scan contiguous bloom filter array */
|
||||
PyObject *invalidate = PyList_New(0);
|
||||
if (invalidate == NULL) {
|
||||
goto error;
|
||||
}
|
||||
/* Clearing an executor can clear others, so we need to make a list of
|
||||
* executors to invalidate first */
|
||||
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
|
||||
assert(exec->vm_data.valid);
|
||||
_PyExecutorObject *next = exec->vm_data.links.next;
|
||||
if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter) &&
|
||||
PyList_Append(invalidate, (PyObject *)exec))
|
||||
for (size_t i = 0; i < interp->executor_count; i++) {
|
||||
assert(interp->executor_ptrs[i]->vm_data.valid);
|
||||
if (bloom_filter_may_contain(&interp->executor_blooms[i], &obj_filter) &&
|
||||
PyList_Append(invalidate, (PyObject *)interp->executor_ptrs[i]))
|
||||
{
|
||||
goto error;
|
||||
}
|
||||
exec = next;
|
||||
}
|
||||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
|
||||
PyObject *exec = PyList_GET_ITEM(invalidate, i);
|
||||
|
|
@ -1859,8 +1867,9 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is
|
|||
void
|
||||
_Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
|
||||
{
|
||||
while (interp->executor_list_head) {
|
||||
_PyExecutorObject *executor = interp->executor_list_head;
|
||||
while (interp->executor_count > 0) {
|
||||
/* Invalidate from the end to avoid repeated swap-remove shifts */
|
||||
_PyExecutorObject *executor = interp->executor_ptrs[interp->executor_count - 1];
|
||||
assert(executor->vm_data.valid);
|
||||
if (executor->vm_data.code) {
|
||||
// Clear the entire code object so its co_executors array be freed:
|
||||
|
|
@ -1878,8 +1887,7 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
|
|||
void
|
||||
_Py_Executors_InvalidateCold(PyInterpreterState *interp)
|
||||
{
|
||||
/* Walk the list of executors */
|
||||
/* TO DO -- Use a tree to avoid traversing as many objects */
|
||||
/* Scan contiguous executor array */
|
||||
PyObject *invalidate = PyList_New(0);
|
||||
if (invalidate == NULL) {
|
||||
goto error;
|
||||
|
|
@ -1887,9 +1895,9 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
|
|||
|
||||
/* Clearing an executor can deallocate others, so we need to make a list of
|
||||
* executors to invalidate first */
|
||||
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
|
||||
for (size_t i = 0; i < interp->executor_count; i++) {
|
||||
_PyExecutorObject *exec = interp->executor_ptrs[i];
|
||||
assert(exec->vm_data.valid);
|
||||
_PyExecutorObject *next = exec->vm_data.links.next;
|
||||
|
||||
if (exec->vm_data.cold && PyList_Append(invalidate, (PyObject *)exec) < 0) {
|
||||
goto error;
|
||||
|
|
@ -1897,8 +1905,6 @@ _Py_Executors_InvalidateCold(PyInterpreterState *interp)
|
|||
else {
|
||||
exec->vm_data.cold = true;
|
||||
}
|
||||
|
||||
exec = next;
|
||||
}
|
||||
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
|
||||
PyObject *exec = PyList_GET_ITEM(invalidate, i);
|
||||
|
|
@ -2142,9 +2148,8 @@ _PyDumpExecutors(FILE *out)
|
|||
fprintf(out, " rankdir = \"LR\"\n\n");
|
||||
fprintf(out, " node [colorscheme=greys9]\n");
|
||||
PyInterpreterState *interp = PyInterpreterState_Get();
|
||||
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
|
||||
executor_to_gv(exec, out);
|
||||
exec = exec->vm_data.links.next;
|
||||
for (size_t i = 0; i < interp->executor_count; i++) {
|
||||
executor_to_gv(interp->executor_ptrs[i], out);
|
||||
}
|
||||
fprintf(out, "}\n\n");
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -1761,6 +1761,12 @@ finalize_modules(PyThreadState *tstate)
|
|||
interp->compiling = false;
|
||||
#ifdef _Py_TIER2
|
||||
_Py_Executors_InvalidateAll(interp, 0);
|
||||
PyMem_Free(interp->executor_blooms);
|
||||
PyMem_Free(interp->executor_ptrs);
|
||||
interp->executor_blooms = NULL;
|
||||
interp->executor_ptrs = NULL;
|
||||
interp->executor_count = 0;
|
||||
interp->executor_capacity = 0;
|
||||
#endif
|
||||
|
||||
// Stop watching __builtin__ modifications
|
||||
|
|
|
|||
|
|
@ -597,7 +597,10 @@ init_interpreter(PyInterpreterState *interp,
|
|||
interp->_code_object_generation = 0;
|
||||
interp->jit = false;
|
||||
interp->compiling = false;
|
||||
interp->executor_list_head = NULL;
|
||||
interp->executor_blooms = NULL;
|
||||
interp->executor_ptrs = NULL;
|
||||
interp->executor_count = 0;
|
||||
interp->executor_capacity = 0;
|
||||
interp->executor_deletion_list_head = NULL;
|
||||
interp->executor_creation_counter = JIT_CLEANUP_THRESHOLD;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue