mirror of
https://github.com/python/cpython.git
synced 2026-06-28 03:41:13 +00:00
gh-151613: Fix remote debugging frame cache ABA (#151614)
The remote debugging frame cache previously used only the last_profiled_frame address as its cache anchor. If a frame returned and a later frame reused the same _PyInterpreterFrame address, the profiler could accept a stale cache entry and splice parent frames from a different call chain into the current stack.
This adds a last_profiled_frame_seq counter next to last_profiled_frame, increments it when the anchor advances, stores it in frame cache entries, and validates cache hits against both the frame address and the sequence. Cache miss walks now copy stack chunks before storing new cache entries so stored continuations come from a stable snapshot. The new regression test exercises alternating call chains and checks that cached stacks never contain frames from both branches.
(cherry picked from commit 8cda6ae2f1)
672 lines
24 KiB
C
672 lines
24 KiB
C
/******************************************************************************
|
|
* Remote Debugging Module - Frame Functions
|
|
*
|
|
* This file contains functions for parsing interpreter frames and
|
|
* managing stack chunks from remote process memory.
|
|
******************************************************************************/
|
|
|
|
#include "_remote_debugging.h"
|
|
|
|
/* ============================================================================
|
|
* STACK CHUNK MANAGEMENT FUNCTIONS
|
|
* ============================================================================ */
|
|
|
|
void
|
|
cleanup_stack_chunks(StackChunkList *chunks)
|
|
{
|
|
for (size_t i = 0; i < chunks->count; ++i) {
|
|
PyMem_RawFree(chunks->chunks[i].local_copy);
|
|
}
|
|
PyMem_RawFree(chunks->chunks);
|
|
}
|
|
|
|
static int
|
|
process_single_stack_chunk(
|
|
RemoteUnwinderObject *unwinder,
|
|
uintptr_t chunk_addr,
|
|
StackChunkInfo *chunk_info
|
|
) {
|
|
// Start with default size assumption
|
|
size_t current_size = _PY_DATA_STACK_CHUNK_SIZE;
|
|
|
|
char *this_chunk = PyMem_RawMalloc(current_size);
|
|
if (!this_chunk) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunk buffer");
|
|
return -1;
|
|
}
|
|
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, current_size, this_chunk) < 0) {
|
|
PyMem_RawFree(this_chunk);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read stack chunk");
|
|
return -1;
|
|
}
|
|
|
|
// Check actual size and reread if necessary
|
|
size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size));
|
|
if (actual_size != current_size) {
|
|
// Validate size: reject garbage (too small or unreasonably large)
|
|
// Size must be at least enough for the header and reasonably bounded
|
|
if (actual_size <= offsetof(_PyStackChunk, data) || actual_size > MAX_STACK_CHUNK_SIZE) {
|
|
PyMem_RawFree(this_chunk);
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Invalid stack chunk size %zu (corrupted remote memory)", actual_size);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError,
|
|
"Invalid stack chunk size (corrupted remote memory)");
|
|
return -1;
|
|
}
|
|
|
|
char *tmp = PyMem_RawRealloc(this_chunk, actual_size);
|
|
if (!tmp) {
|
|
PyMem_RawFree(this_chunk);
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to reallocate stack chunk buffer");
|
|
return -1;
|
|
}
|
|
this_chunk = tmp;
|
|
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, chunk_addr, actual_size, this_chunk) < 0) {
|
|
PyMem_RawFree(this_chunk);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to reread stack chunk with correct size");
|
|
return -1;
|
|
}
|
|
current_size = actual_size;
|
|
}
|
|
|
|
chunk_info->remote_addr = chunk_addr;
|
|
chunk_info->size = current_size;
|
|
chunk_info->local_copy = this_chunk;
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
copy_stack_chunks(RemoteUnwinderObject *unwinder,
|
|
uintptr_t tstate_addr,
|
|
StackChunkList *out_chunks)
|
|
{
|
|
uintptr_t chunk_addr;
|
|
StackChunkInfo *chunks = NULL;
|
|
size_t count = 0;
|
|
size_t max_chunks = 16;
|
|
|
|
if (read_ptr(unwinder, tstate_addr + (uintptr_t)unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read initial stack chunk address");
|
|
return -1;
|
|
}
|
|
|
|
chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo));
|
|
if (!chunks) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate stack chunks array");
|
|
return -1;
|
|
}
|
|
|
|
const size_t MAX_STACK_CHUNKS = 4096;
|
|
while (chunk_addr != 0 && count < MAX_STACK_CHUNKS) {
|
|
// Grow array if needed
|
|
if (count >= max_chunks) {
|
|
max_chunks *= 2;
|
|
StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo));
|
|
if (!new_chunks) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to grow stack chunks array");
|
|
goto error;
|
|
}
|
|
chunks = new_chunks;
|
|
}
|
|
|
|
// Process this chunk
|
|
if (process_single_stack_chunk(unwinder, chunk_addr, &chunks[count]) < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process stack chunk");
|
|
goto error;
|
|
}
|
|
|
|
// Get next chunk address and increment count
|
|
chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous));
|
|
count++;
|
|
}
|
|
|
|
out_chunks->chunks = chunks;
|
|
out_chunks->count = count;
|
|
return 0;
|
|
|
|
error:
|
|
for (size_t i = 0; i < count; ++i) {
|
|
PyMem_RawFree(chunks[i].local_copy);
|
|
}
|
|
PyMem_RawFree(chunks);
|
|
return -1;
|
|
}
|
|
|
|
void *
|
|
find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr)
|
|
{
|
|
for (size_t i = 0; i < chunks->count; ++i) {
|
|
// Validate size: reject garbage that would cause underflow
|
|
if (chunks->chunks[i].size <= offsetof(_PyStackChunk, data)) {
|
|
// Skip this chunk - corrupted size from remote memory
|
|
continue;
|
|
}
|
|
uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data);
|
|
size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data);
|
|
|
|
if (payload >= SIZEOF_INTERP_FRAME &&
|
|
remote_ptr >= base &&
|
|
remote_ptr <= base + payload - SIZEOF_INTERP_FRAME) {
|
|
return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr);
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* ============================================================================
|
|
* FRAME PARSING FUNCTIONS
|
|
* ============================================================================ */
|
|
|
|
int
|
|
is_frame_valid(
|
|
RemoteUnwinderObject *unwinder,
|
|
uintptr_t frame_addr,
|
|
uintptr_t code_object_addr
|
|
) {
|
|
if ((void*)code_object_addr == NULL) {
|
|
return 0;
|
|
}
|
|
|
|
void* frame = (void*)frame_addr;
|
|
|
|
char owner = GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner);
|
|
if (owner == FRAME_OWNED_BY_INTERPRETER) {
|
|
return 0; // C frame or sentinel base frame
|
|
}
|
|
|
|
if (owner != FRAME_OWNED_BY_GENERATOR && owner != FRAME_OWNED_BY_THREAD) {
|
|
PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Unhandled frame owner type in async frame");
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
parse_frame_buffer(
|
|
RemoteUnwinderObject *unwinder,
|
|
PyObject** result,
|
|
const char *frame,
|
|
uintptr_t* address_of_code_object,
|
|
uintptr_t* previous_frame
|
|
) {
|
|
*address_of_code_object = 0;
|
|
|
|
*previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
|
|
uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable);
|
|
int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
|
|
if (frame_valid != 1) {
|
|
return frame_valid;
|
|
}
|
|
|
|
uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
|
|
|
|
// Get tlbc_index for free threading builds
|
|
int32_t tlbc_index = 0;
|
|
#ifdef Py_GIL_DISABLED
|
|
if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
|
|
tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
|
|
}
|
|
#endif
|
|
|
|
*address_of_code_object = code_object;
|
|
|
|
CodeObjectContext code_ctx = {
|
|
.code_addr = code_object,
|
|
.instruction_pointer = instruction_pointer,
|
|
.tlbc_index = tlbc_index,
|
|
};
|
|
return parse_code_object(unwinder, result, &code_ctx);
|
|
}
|
|
|
|
int
|
|
parse_frame_object(
|
|
RemoteUnwinderObject *unwinder,
|
|
PyObject** result,
|
|
uintptr_t address,
|
|
uintptr_t* address_of_code_object,
|
|
uintptr_t* previous_frame
|
|
) {
|
|
char frame[SIZEOF_INTERP_FRAME];
|
|
Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory(
|
|
&unwinder->handle,
|
|
address,
|
|
SIZEOF_INTERP_FRAME,
|
|
frame
|
|
);
|
|
if (bytes_read < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter frame");
|
|
return -1;
|
|
}
|
|
STATS_INC(unwinder, memory_reads);
|
|
STATS_ADD(unwinder, memory_bytes_read, SIZEOF_INTERP_FRAME);
|
|
|
|
return parse_frame_buffer(unwinder, result, frame, address_of_code_object, previous_frame);
|
|
}
|
|
|
|
int
|
|
parse_frame_from_chunks(
|
|
RemoteUnwinderObject *unwinder,
|
|
PyObject **result,
|
|
uintptr_t address,
|
|
uintptr_t *previous_frame,
|
|
uintptr_t *stackpointer,
|
|
StackChunkList *chunks
|
|
) {
|
|
void *frame_ptr = find_frame_in_chunks(chunks, address);
|
|
if (!frame_ptr) {
|
|
PyErr_Format(PyExc_RuntimeError, "Frame at address 0x%lx not found in stack chunks", address);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame not found in stack chunks");
|
|
return -1;
|
|
}
|
|
|
|
char *frame = (char *)frame_ptr;
|
|
*previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous);
|
|
*stackpointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.stackpointer);
|
|
uintptr_t code_object = GET_MEMBER_NO_TAG(uintptr_t, frame_ptr, unwinder->debug_offsets.interpreter_frame.executable);
|
|
int frame_valid = is_frame_valid(unwinder, (uintptr_t)frame, code_object);
|
|
if (frame_valid != 1) {
|
|
return frame_valid;
|
|
}
|
|
|
|
uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr);
|
|
|
|
// Get tlbc_index for free threading builds
|
|
int32_t tlbc_index = 0;
|
|
#ifdef Py_GIL_DISABLED
|
|
if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) {
|
|
tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index);
|
|
}
|
|
#endif
|
|
|
|
CodeObjectContext code_ctx = {
|
|
.code_addr = code_object,
|
|
.instruction_pointer = instruction_pointer,
|
|
.tlbc_index = tlbc_index,
|
|
};
|
|
return parse_code_object(unwinder, result, &code_ctx);
|
|
}
|
|
|
|
/* ============================================================================
|
|
* FRAME CHAIN PROCESSING
|
|
* ============================================================================ */
|
|
|
|
int
|
|
process_frame_chain(
|
|
RemoteUnwinderObject *unwinder,
|
|
FrameWalkContext *ctx)
|
|
{
|
|
uintptr_t frame_addr = ctx->frame_addr;
|
|
uintptr_t prev_frame_addr = 0;
|
|
uintptr_t last_frame_addr = 0;
|
|
const size_t MAX_FRAMES = 1024 + 512;
|
|
size_t frame_count = 0;
|
|
assert(MAX_FRAMES > 0 && MAX_FRAMES < 10000);
|
|
|
|
ctx->stopped_at_cached_frame = 0;
|
|
ctx->last_frame_visited = 0;
|
|
|
|
while ((void*)frame_addr != NULL) {
|
|
PyObject *frame = NULL;
|
|
uintptr_t next_frame_addr = 0;
|
|
uintptr_t stackpointer = 0;
|
|
last_frame_addr = frame_addr;
|
|
|
|
if (++frame_count > MAX_FRAMES) {
|
|
PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)");
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain iteration limit exceeded");
|
|
return -1;
|
|
}
|
|
assert(frame_count <= MAX_FRAMES);
|
|
|
|
if (ctx->chunks && ctx->chunks->count > 0) {
|
|
if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, &stackpointer, ctx->chunks) == 0) {
|
|
goto parsed_frame;
|
|
}
|
|
PyErr_Clear();
|
|
}
|
|
{
|
|
uintptr_t address_of_code_object = 0;
|
|
int parse_result;
|
|
if (ctx->prefetch.frame && ctx->prefetch.frame_addr == frame_addr) {
|
|
parse_result = parse_frame_buffer(
|
|
unwinder, &frame, ctx->prefetch.frame,
|
|
&address_of_code_object, &next_frame_addr);
|
|
}
|
|
else {
|
|
parse_result = parse_frame_object(
|
|
unwinder, &frame, frame_addr,
|
|
&address_of_code_object, &next_frame_addr);
|
|
}
|
|
if (parse_result < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to parse frame object in chain");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
parsed_frame:
|
|
// Skip first frame if requested (used for cache miss continuation)
|
|
if (ctx->skip_first_frame && frame_count == 1) {
|
|
Py_XDECREF(frame);
|
|
frame_addr = next_frame_addr;
|
|
continue;
|
|
}
|
|
|
|
if (frame == NULL && PyList_GET_SIZE(ctx->frame_info) == 0) {
|
|
const char *e = "Failed to parse initial frame in chain";
|
|
PyErr_SetString(PyExc_RuntimeError, e);
|
|
return -1;
|
|
}
|
|
PyObject *extra_frame = NULL;
|
|
if (unwinder->gc && frame_addr == ctx->gc_frame) {
|
|
_Py_DECLARE_STR(gc, "<GC>");
|
|
extra_frame = &_Py_STR(gc);
|
|
}
|
|
else if (unwinder->native &&
|
|
frame == NULL &&
|
|
next_frame_addr &&
|
|
!(unwinder->gc && next_frame_addr == ctx->gc_frame))
|
|
{
|
|
_Py_DECLARE_STR(native, "<native>");
|
|
extra_frame = &_Py_STR(native);
|
|
}
|
|
if (extra_frame) {
|
|
PyObject *extra_frame_info = make_frame_info(
|
|
unwinder, _Py_LATIN1_CHR('~'), Py_None, extra_frame, Py_None);
|
|
if (extra_frame_info == NULL) {
|
|
Py_XDECREF(frame);
|
|
return -1;
|
|
}
|
|
if (PyList_Append(ctx->frame_info, extra_frame_info) < 0) {
|
|
Py_DECREF(extra_frame_info);
|
|
Py_XDECREF(frame);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append extra frame");
|
|
return -1;
|
|
}
|
|
if (ctx->frame_addrs && ctx->num_addrs < ctx->max_addrs) {
|
|
assert(ctx->num_addrs >= 0);
|
|
ctx->frame_addrs[ctx->num_addrs++] = 0;
|
|
}
|
|
Py_DECREF(extra_frame_info);
|
|
}
|
|
if (frame) {
|
|
if (prev_frame_addr && frame_addr != prev_frame_addr) {
|
|
const char *f = "Broken frame chain: expected frame at 0x%lx, got 0x%lx";
|
|
PyErr_Format(PyExc_RuntimeError, f, prev_frame_addr, frame_addr);
|
|
Py_DECREF(frame);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed");
|
|
return -1;
|
|
}
|
|
|
|
if (PyList_Append(ctx->frame_info, frame) < 0) {
|
|
Py_DECREF(frame);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame");
|
|
return -1;
|
|
}
|
|
if (ctx->frame_addrs && ctx->num_addrs < ctx->max_addrs) {
|
|
assert(ctx->num_addrs >= 0);
|
|
ctx->frame_addrs[ctx->num_addrs++] = frame_addr;
|
|
}
|
|
Py_DECREF(frame);
|
|
}
|
|
|
|
if (ctx->last_profiled.frame != 0 && frame_addr == ctx->last_profiled.frame) {
|
|
ctx->stopped_at_cached_frame = 1;
|
|
break;
|
|
}
|
|
|
|
prev_frame_addr = next_frame_addr;
|
|
frame_addr = next_frame_addr;
|
|
}
|
|
|
|
if (!ctx->stopped_at_cached_frame && ctx->base_frame_addr != 0 && last_frame_addr != ctx->base_frame_addr) {
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Incomplete sample: did not reach base frame (expected 0x%lx, got 0x%lx)",
|
|
ctx->base_frame_addr, last_frame_addr);
|
|
return -1;
|
|
}
|
|
|
|
ctx->last_frame_visited = last_frame_addr;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int
|
|
set_last_profiled_frame(RemoteUnwinderObject *unwinder, uintptr_t tstate_addr,
|
|
uintptr_t frame_addr)
|
|
{
|
|
uintptr_t lpf_addr = tstate_addr +
|
|
(uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame;
|
|
return _Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr,
|
|
sizeof(uintptr_t), &frame_addr);
|
|
}
|
|
|
|
// Clear the profiler anchor frame for all threads in the target process. The
|
|
// sequence is intentionally preserved: a zero frame disables cache lookup, and
|
|
// the next profiler-owned anchor should use the target's current generation.
|
|
int
|
|
clear_last_profiled_frames(RemoteUnwinderObject *unwinder)
|
|
{
|
|
uintptr_t current_interp = unwinder->interpreter_addr;
|
|
const size_t MAX_INTERPRETERS = 256;
|
|
size_t interp_count = 0;
|
|
|
|
while (current_interp != 0 && interp_count < MAX_INTERPRETERS) {
|
|
interp_count++;
|
|
// Get first thread in this interpreter
|
|
uintptr_t tstate_addr;
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
current_interp + unwinder->debug_offsets.interpreter_state.threads_head,
|
|
sizeof(void*),
|
|
&tstate_addr) < 0) {
|
|
// Non-fatal: just skip clearing
|
|
PyErr_Clear();
|
|
return 0;
|
|
}
|
|
|
|
// Iterate all threads in this interpreter
|
|
const size_t MAX_THREADS_PER_INTERP = 8192;
|
|
size_t thread_count = 0;
|
|
while (tstate_addr != 0 && thread_count < MAX_THREADS_PER_INTERP) {
|
|
thread_count++;
|
|
uintptr_t no_frame = 0;
|
|
if (set_last_profiled_frame(unwinder, tstate_addr, no_frame) < 0) {
|
|
PyErr_Clear();
|
|
}
|
|
|
|
// Move to next thread
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
tstate_addr + unwinder->debug_offsets.thread_state.next,
|
|
sizeof(void*),
|
|
&tstate_addr) < 0) {
|
|
PyErr_Clear();
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Move to next interpreter
|
|
if (_Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
current_interp + unwinder->debug_offsets.interpreter_state.next,
|
|
sizeof(void*),
|
|
¤t_interp) < 0) {
|
|
PyErr_Clear();
|
|
break;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Fast path: check if we have a full cache hit (parent stack unchanged)
|
|
// A "full hit" means current frame == last profiled frame, so we can reuse
|
|
// cached parent frames. We always read the current frame from memory to get
|
|
// updated line numbers (the line within a frame can change between samples).
|
|
// Returns: 1 if full hit (frame_info populated with current frame + cached parents),
|
|
// 0 if miss, -1 on error
|
|
static int
|
|
try_full_cache_hit(
|
|
RemoteUnwinderObject *unwinder,
|
|
const FrameWalkContext *ctx,
|
|
uint64_t thread_id)
|
|
{
|
|
if (!unwinder->frame_cache || ctx->last_profiled.frame == 0) {
|
|
return 0;
|
|
}
|
|
if (ctx->frame_addr != ctx->last_profiled.frame) {
|
|
return 0;
|
|
}
|
|
|
|
FrameCacheEntry *entry = frame_cache_find(unwinder, thread_id);
|
|
if (!entry || !entry->frame_list) {
|
|
return 0;
|
|
}
|
|
if (entry->thread_state_addr != ctx->thread_state_addr) {
|
|
return 0;
|
|
}
|
|
|
|
if (entry->num_addrs == 0 || entry->addrs[0] != ctx->frame_addr) {
|
|
return 0;
|
|
}
|
|
if (entry->last_profiled_frame_seq != ctx->last_profiled.seq) {
|
|
return 0;
|
|
}
|
|
|
|
PyObject *current_frame = NULL;
|
|
uintptr_t code_object_addr = 0;
|
|
uintptr_t previous_frame = 0;
|
|
int parse_result;
|
|
if (ctx->prefetch.frame && ctx->prefetch.frame_addr == ctx->frame_addr) {
|
|
parse_result = parse_frame_buffer(unwinder, ¤t_frame,
|
|
ctx->prefetch.frame,
|
|
&code_object_addr, &previous_frame);
|
|
}
|
|
else {
|
|
parse_result = parse_frame_object(unwinder, ¤t_frame, ctx->frame_addr,
|
|
&code_object_addr, &previous_frame);
|
|
}
|
|
if (parse_result < 0) {
|
|
return -1;
|
|
}
|
|
if (!frame_cache_anchor_matches(unwinder, ctx->thread_state_addr,
|
|
ctx->last_profiled)) {
|
|
Py_XDECREF(current_frame);
|
|
return 0;
|
|
}
|
|
|
|
if (current_frame != NULL) {
|
|
if (PyList_Append(ctx->frame_info, current_frame) < 0) {
|
|
Py_DECREF(current_frame);
|
|
return -1;
|
|
}
|
|
Py_DECREF(current_frame);
|
|
STATS_ADD(unwinder, frames_read_from_memory, 1);
|
|
}
|
|
|
|
Py_ssize_t cached_size = PyList_GET_SIZE(entry->frame_list);
|
|
for (Py_ssize_t i = 1; i < cached_size; i++) {
|
|
PyObject *cached_frame = PyList_GET_ITEM(entry->frame_list, i);
|
|
if (PyList_Append(ctx->frame_info, cached_frame) < 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
STATS_ADD(unwinder, frames_read_from_cache, cached_size > 1 ? cached_size - 1 : 0);
|
|
|
|
STATS_INC(unwinder, frame_cache_hits);
|
|
return 1;
|
|
}
|
|
|
|
// High-level helper: collect frames with cache optimization
|
|
// Returns complete frame_info list, handling all cache logic internally
|
|
int
|
|
collect_frames_with_cache(
|
|
RemoteUnwinderObject *unwinder,
|
|
FrameWalkContext *ctx,
|
|
uint64_t thread_id)
|
|
{
|
|
int full_hit = try_full_cache_hit(unwinder, ctx, thread_id);
|
|
if (full_hit != 0) {
|
|
return full_hit < 0 ? -1 : 0;
|
|
}
|
|
|
|
assert(ctx->chunks != NULL);
|
|
|
|
// Cache misses copy stack chunks before walking. Frames found there are
|
|
// parsed from a stable snapshot, which keeps moving stacks from seeding the
|
|
// cache with an impossible parent chain.
|
|
if (ctx->chunks->count == 0) {
|
|
if (copy_stack_chunks(unwinder, ctx->thread_state_addr, ctx->chunks) < 0) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
Py_ssize_t frames_before = PyList_GET_SIZE(ctx->frame_info);
|
|
|
|
if (process_frame_chain(unwinder, ctx) < 0) {
|
|
return -1;
|
|
}
|
|
|
|
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(ctx->frame_info) - frames_before);
|
|
|
|
if (ctx->stopped_at_cached_frame) {
|
|
Py_ssize_t frames_before_cache = PyList_GET_SIZE(ctx->frame_info);
|
|
int cache_result = frame_cache_lookup_and_extend(unwinder, thread_id,
|
|
ctx->thread_state_addr,
|
|
ctx->last_profiled,
|
|
ctx->frame_info, ctx->frame_addrs, &ctx->num_addrs,
|
|
ctx->max_addrs);
|
|
if (cache_result < 0) {
|
|
return -1;
|
|
}
|
|
if (cache_result == 0) {
|
|
STATS_INC(unwinder, frame_cache_misses);
|
|
|
|
// Continue walking from last_profiled_frame, skipping it (already processed)
|
|
Py_ssize_t frames_before_walk = PyList_GET_SIZE(ctx->frame_info);
|
|
FrameWalkContext continue_ctx = {
|
|
.frame_addr = ctx->last_profiled.frame,
|
|
.base_frame_addr = ctx->base_frame_addr,
|
|
.gc_frame = ctx->gc_frame,
|
|
.chunks = ctx->chunks,
|
|
.skip_first_frame = 1,
|
|
.frame_info = ctx->frame_info,
|
|
.frame_addrs = ctx->frame_addrs,
|
|
.num_addrs = ctx->num_addrs,
|
|
.max_addrs = ctx->max_addrs,
|
|
};
|
|
if (process_frame_chain(unwinder, &continue_ctx) < 0) {
|
|
return -1;
|
|
}
|
|
ctx->num_addrs = continue_ctx.num_addrs;
|
|
ctx->last_frame_visited = continue_ctx.last_frame_visited;
|
|
STATS_ADD(unwinder, frames_read_from_memory, PyList_GET_SIZE(ctx->frame_info) - frames_before_walk);
|
|
} else {
|
|
// Partial cache hit - cached stack was validated as complete when stored,
|
|
// so set last_frame_visited to base_frame_addr for validation in frame_cache_store
|
|
ctx->last_frame_visited = ctx->base_frame_addr;
|
|
STATS_INC(unwinder, frame_cache_partial_hits);
|
|
STATS_ADD(unwinder, frames_read_from_cache, PyList_GET_SIZE(ctx->frame_info) - frames_before_cache);
|
|
}
|
|
} else {
|
|
if (ctx->last_profiled.frame == 0) {
|
|
STATS_INC(unwinder, frame_cache_misses);
|
|
}
|
|
}
|
|
|
|
if (frame_cache_store(unwinder, thread_id, ctx->frame_info, ctx->frame_addrs,
|
|
ctx->num_addrs, ctx->thread_state_addr,
|
|
ctx->last_profiled.seq, ctx->base_frame_addr,
|
|
ctx->last_frame_visited) < 0) {
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|