/****************************************************************************** * Remote Debugging Module - Thread Functions * * This file contains functions for iterating threads and determining * thread status in remote process memory. ******************************************************************************/ #include "_remote_debugging.h" #ifndef MS_WINDOWS #include #endif #ifdef __linux__ #include #include #include #endif /* ============================================================================ * THREAD ITERATION FUNCTIONS * ============================================================================ */ int iterate_threads( RemoteUnwinderObject *unwinder, thread_processor_func processor, void *context ) { uintptr_t thread_state_addr; unsigned long tid = 0; const size_t MAX_THREADS = 8192; size_t thread_count = 0; if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, unwinder->interpreter_addr + (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main, sizeof(void*), &thread_state_addr)) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state"); return -1; } while (thread_state_addr != 0 && thread_count < MAX_THREADS) { thread_count++; if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.native_thread_id, sizeof(tid), &tid)) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread ID"); return -1; } // Call the processor function for this thread if (processor(unwinder, thread_state_addr, tid, context) < 0) { return -1; } // Move to next thread if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, thread_state_addr + (uintptr_t)unwinder->debug_offsets.thread_state.next, sizeof(void*), &thread_state_addr)) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read next thread state"); return -1; } } return 0; } /* ============================================================================ * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS * ============================================================================ */ int populate_initial_state_data( int all_threads, RemoteUnwinderObject *unwinder, uintptr_t runtime_start_address, uintptr_t *interpreter_state, uintptr_t *tstate ) { uintptr_t interpreter_state_list_head = (uintptr_t)unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read interpreter state address"); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); set_exception_cause(unwinder, PyExc_RuntimeError, "Interpreter state is NULL"); return -1; } *interpreter_state = address_of_interpreter_state; if (all_threads) { *tstate = 0; return 0; } uintptr_t address_of_thread = address_of_interpreter_state + (uintptr_t)unwinder->debug_offsets.interpreter_state.threads_main; if (_Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, address_of_thread, sizeof(void*), tstate) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read main thread state address"); return -1; } return 0; } int find_running_frame( RemoteUnwinderObject *unwinder, uintptr_t address_of_thread, uintptr_t *frame ) { if ((void*)address_of_thread != NULL) { int err = read_ptr( unwinder, address_of_thread + (uintptr_t)unwinder->debug_offsets.thread_state.current_frame, frame); if (err) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read current frame pointer"); return -1; } return 0; } *frame = (uintptr_t)NULL; return 0; } /* ============================================================================ * THREAD STATUS FUNCTIONS * ============================================================================ */ int get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id) { #if defined(__APPLE__) && TARGET_OS_OSX if (unwinder->thread_id_offset == 0) { uint64_t *tids = (uint64_t *)PyMem_Malloc(MAX_NATIVE_THREADS * sizeof(uint64_t)); if (!tids) { PyErr_NoMemory(); return -1; } int n = proc_pidinfo(unwinder->handle.pid, PROC_PIDLISTTHREADS, 0, tids, MAX_NATIVE_THREADS * sizeof(uint64_t)) / sizeof(uint64_t); if (n <= 0) { PyMem_Free(tids); return THREAD_STATE_UNKNOWN; } uint64_t min_offset = UINT64_MAX; for (int i = 0; i < n; i++) { uint64_t offset = tids[i] - pthread_id; if (offset < min_offset) { min_offset = offset; } } unwinder->thread_id_offset = min_offset; PyMem_Free(tids); } struct proc_threadinfo ti; uint64_t tid_with_offset = pthread_id + unwinder->thread_id_offset; if (proc_pidinfo(unwinder->handle.pid, PROC_PIDTHREADINFO, tid_with_offset, &ti, sizeof(ti)) != sizeof(ti)) { return THREAD_STATE_UNKNOWN; } if (ti.pth_run_state == TH_STATE_RUNNING) { return THREAD_STATE_RUNNING; } return THREAD_STATE_IDLE; #elif defined(__linux__) char stat_path[256]; char buffer[2048] = ""; snprintf(stat_path, sizeof(stat_path), "/proc/%d/task/%lu/stat", unwinder->handle.pid, tid); int fd = open(stat_path, O_RDONLY); if (fd == -1) { return THREAD_STATE_UNKNOWN; } if (read(fd, buffer, 2047) == 0) { close(fd); return THREAD_STATE_UNKNOWN; } close(fd); char *p = strchr(buffer, ')'); if (!p) { return THREAD_STATE_UNKNOWN; } p += 2; // Skip ") " if (*p == ' ') { p++; } switch (*p) { case 'R': // Running return THREAD_STATE_RUNNING; case 'S': // Interruptible sleep case 'D': // Uninterruptible sleep case 'T': // Stopped case 'Z': // Zombie case 'I': // Idle kernel thread return THREAD_STATE_IDLE; default: return THREAD_STATE_UNKNOWN; } #elif defined(MS_WINDOWS) ULONG n; NTSTATUS status = NtQuerySystemInformation( SystemProcessInformation, unwinder->win_process_buffer, unwinder->win_process_buffer_size, &n ); if (status == STATUS_INFO_LENGTH_MISMATCH) { // Buffer was too small so we reallocate a larger one and try again. unwinder->win_process_buffer_size = n; PVOID new_buffer = PyMem_Realloc(unwinder->win_process_buffer, n); if (!new_buffer) { return -1; } unwinder->win_process_buffer = new_buffer; return get_thread_status(unwinder, tid, pthread_id); } if (status != STATUS_SUCCESS) { return -1; } SYSTEM_PROCESS_INFORMATION *pi = (SYSTEM_PROCESS_INFORMATION *)unwinder->win_process_buffer; while ((ULONG)(ULONG_PTR)pi->UniqueProcessId != unwinder->handle.pid) { if (pi->NextEntryOffset == 0) { // We didn't find the process return -1; } pi = (SYSTEM_PROCESS_INFORMATION *)(((BYTE *)pi) + pi->NextEntryOffset); } SYSTEM_THREAD_INFORMATION *ti = (SYSTEM_THREAD_INFORMATION *)((char *)pi + sizeof(SYSTEM_PROCESS_INFORMATION)); for (size_t i = 0; i < pi->NumberOfThreads; i++, ti++) { if (ti->ClientId.UniqueThread == (HANDLE)tid) { return ti->ThreadState != WIN32_THREADSTATE_RUNNING ? THREAD_STATE_IDLE : THREAD_STATE_RUNNING; } } return -1; #else return THREAD_STATE_UNKNOWN; #endif } /* ============================================================================ * STACK UNWINDING FUNCTIONS * ============================================================================ */ typedef struct { unsigned int initialized:1; unsigned int bound:1; unsigned int unbound:1; unsigned int bound_gilstate:1; unsigned int active:1; unsigned int finalizing:1; unsigned int cleared:1; unsigned int finalized:1; unsigned int :24; } _thread_status; PyObject* unwind_stack_for_thread( RemoteUnwinderObject *unwinder, uintptr_t *current_tstate, uintptr_t gil_holder_tstate, uintptr_t gc_frame ) { PyObject *frame_info = NULL; PyObject *thread_id = NULL; PyObject *result = NULL; StackChunkList chunks = {0}; char ts[SIZEOF_THREAD_STATE]; int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( &unwinder->handle, *current_tstate, (size_t)unwinder->debug_offsets.thread_state.size, ts); if (bytes_read < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read thread state"); goto error; } STATS_INC(unwinder, memory_reads); STATS_ADD(unwinder, memory_bytes_read, unwinder->debug_offsets.thread_state.size); long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id); // Read GC collecting state from the interpreter (before any skip checks) uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp); // Read the GC runtime state from the interpreter state uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc; char gc_state[SIZEOF_GC_RUNTIME_STATE]; if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state"); goto error; } STATS_INC(unwinder, memory_reads); STATS_ADD(unwinder, memory_bytes_read, unwinder->debug_offsets.gc.size); // Calculate thread status using flags (always) int status_flags = 0; // Check GIL status int has_gil = 0; int gil_requested = 0; #ifdef Py_GIL_DISABLED int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active; has_gil = active; (void)gil_requested; // unused #else // Read holds_gil directly from thread state has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil); // Check if thread is actively requesting the GIL if (unwinder->debug_offsets.thread_state.gil_requested != 0) { gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested); } // Set GIL_REQUESTED flag if thread is waiting if (!has_gil && gil_requested) { status_flags |= THREAD_STATUS_GIL_REQUESTED; } #endif if (has_gil) { status_flags |= THREAD_STATUS_HAS_GIL; // gh-142207 for remote debugging. gil_requested = 0; } // Check exception state (both raised and handled exceptions) int has_exception = 0; // Check current_exception (exception being raised/propagated) uintptr_t current_exception = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_exception); if (current_exception != 0) { has_exception = 1; } // Check exc_state.exc_value (exception being handled in except block) // exc_state is embedded in PyThreadState, so we read it directly from // the thread state buffer. This catches most cases; nested exception // handlers where exc_info points elsewhere are rare. if (!has_exception) { uintptr_t exc_value = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.exc_state + unwinder->debug_offsets.err_stackitem.exc_value); if (exc_value != 0) { has_exception = 1; } } if (has_exception) { status_flags |= THREAD_STATUS_HAS_EXCEPTION; } // Check CPU status long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id); // Optimization: only check CPU status if needed by mode because it's expensive int cpu_status = -1; if (unwinder->mode == PROFILING_MODE_CPU || unwinder->mode == PROFILING_MODE_ALL) { cpu_status = get_thread_status(unwinder, tid, pthread_id); } if (cpu_status == -1) { status_flags |= THREAD_STATUS_UNKNOWN; } else if (cpu_status == THREAD_STATE_RUNNING) { status_flags |= THREAD_STATUS_ON_CPU; } // Check if we should skip this thread based on mode int should_skip = 0; if (unwinder->skip_non_matching_threads) { if (unwinder->mode == PROFILING_MODE_CPU) { // Skip if not on CPU should_skip = !(status_flags & THREAD_STATUS_ON_CPU); } else if (unwinder->mode == PROFILING_MODE_GIL) { // Skip if doesn't have GIL should_skip = !(status_flags & THREAD_STATUS_HAS_GIL); } else if (unwinder->mode == PROFILING_MODE_EXCEPTION) { // Skip if thread doesn't have an exception active should_skip = !(status_flags & THREAD_STATUS_HAS_EXCEPTION); } // PROFILING_MODE_WALL and PROFILING_MODE_ALL never skip } if (should_skip) { // Advance to next thread and return NULL to skip processing *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); return NULL; } uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); uintptr_t base_frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.base_frame); frame_info = PyList_New(0); if (!frame_info) { set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create frame info list"); goto error; } // In cache mode, copying stack chunks is more expensive than direct memory reads if (!unwinder->cache_frames) { if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to copy stack chunks"); goto error; } } uintptr_t addrs[FRAME_CACHE_MAX_FRAMES]; FrameWalkContext ctx = { .frame_addr = frame_addr, .base_frame_addr = base_frame_addr, .gc_frame = gc_frame, .chunks = &chunks, .frame_info = frame_info, .frame_addrs = addrs, .num_addrs = 0, .max_addrs = FRAME_CACHE_MAX_FRAMES, }; assert(ctx.max_addrs == FRAME_CACHE_MAX_FRAMES); if (unwinder->cache_frames) { // Use cache to avoid re-reading unchanged parent frames ctx.last_profiled_frame = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.last_profiled_frame); if (collect_frames_with_cache(unwinder, &ctx, tid) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to collect frames"); goto error; } // Update last_profiled_frame for next sample uintptr_t lpf_addr = *current_tstate + (uintptr_t)unwinder->debug_offsets.thread_state.last_profiled_frame; if (_Py_RemoteDebug_WriteRemoteMemory(&unwinder->handle, lpf_addr, sizeof(uintptr_t), &frame_addr) < 0) { PyErr_Clear(); // Non-fatal } } else { // No caching - process entire frame chain with base_frame validation if (process_frame_chain(unwinder, &ctx) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } } *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); thread_id = PyLong_FromLongLong(tid); if (thread_id == NULL) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID"); goto error; } RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); result = PyStructSequence_New(state->ThreadInfo_Type); if (result == NULL) { set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create ThreadInfo"); goto error; } // Always use status_flags PyObject *py_status = PyLong_FromLong(status_flags); if (py_status == NULL) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status"); goto error; } // py_status contains status flags (bitfield) PyStructSequence_SetItem(result, 0, thread_id); PyStructSequence_SetItem(result, 1, py_status); // Steals reference PyStructSequence_SetItem(result, 2, frame_info); // Steals reference cleanup_stack_chunks(&chunks); return result; error: Py_XDECREF(frame_info); Py_XDECREF(thread_id); Py_XDECREF(result); cleanup_stack_chunks(&chunks); return NULL; } /* ============================================================================ * PROCESS STOP FUNCTIONS * ============================================================================ */ #if defined(__APPLE__) && TARGET_OS_OSX void _Py_RemoteDebug_InitThreadsState(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { st->task = MACH_PORT_NULL; st->suspended = 0; } int _Py_RemoteDebug_StopAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { kern_return_t kr = task_suspend(unwinder->handle.task); if (kr != KERN_SUCCESS) { if (kr == MACH_SEND_INVALID_DEST) { PyErr_Format(PyExc_ProcessLookupError, "Process %d has terminated", unwinder->handle.pid); } else { PyErr_Format(PyExc_RuntimeError, "task_suspend failed for PID %d: kern_return_t %d", unwinder->handle.pid, kr); } return -1; } st->task = unwinder->handle.task; st->suspended = 1; _Py_RemoteDebug_ClearCache(&unwinder->handle); return 0; } void _Py_RemoteDebug_ResumeAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { if (!st->suspended || st->task == MACH_PORT_NULL) { return; } task_resume(st->task); st->task = MACH_PORT_NULL; st->suspended = 0; } #elif defined(__linux__) void _Py_RemoteDebug_InitThreadsState(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { st->tids = NULL; st->count = 0; } static int read_thread_ids(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { char task_path[64]; snprintf(task_path, sizeof(task_path), "/proc/%d/task", unwinder->handle.pid); DIR *dir = opendir(task_path); if (dir == NULL) { st->tids = NULL; st->count = 0; if (errno == ENOENT || errno == ESRCH) { PyErr_Format(PyExc_ProcessLookupError, "Process %d has terminated", unwinder->handle.pid); } else { PyErr_SetFromErrnoWithFilename(PyExc_OSError, task_path); } return -1; } st->count = 0; struct dirent *entry; while ((entry = readdir(dir)) != NULL) { if (entry->d_name[0] < '1' || entry->d_name[0] > '9') { continue; } char *endptr; long tid = strtol(entry->d_name, &endptr, 10); if (*endptr != '\0' || tid <= 0) { continue; } if (st->count >= unwinder->thread_tids_capacity) { size_t new_cap = unwinder->thread_tids_capacity == 0 ? 64 : unwinder->thread_tids_capacity * 2; pid_t *new_tids = PyMem_RawRealloc(unwinder->thread_tids, new_cap * sizeof(pid_t)); if (new_tids == NULL) { closedir(dir); st->tids = NULL; st->count = 0; PyErr_NoMemory(); return -1; } unwinder->thread_tids = new_tids; unwinder->thread_tids_capacity = new_cap; } unwinder->thread_tids[st->count++] = (pid_t)tid; } st->tids = unwinder->thread_tids; closedir(dir); return 0; } static inline void detach_threads(_Py_RemoteDebug_ThreadsState *st, size_t up_to) { for (size_t j = 0; j < up_to; j++) { ptrace(PTRACE_DETACH, st->tids[j], NULL, NULL); } } static int seize_thread(pid_t tid) { if (ptrace(PTRACE_SEIZE, tid, NULL, 0) == 0) { return 0; } if (errno == ESRCH) { return 1; // Thread gone, skip } if (errno == EINVAL || errno == EIO) { // Fallback for older kernels if (ptrace(PTRACE_ATTACH, tid, NULL, NULL) == 0) { int status; waitpid(tid, &status, __WALL); return 0; } if (errno == ESRCH) { return 1; // Thread gone } } return -1; // Real error } int _Py_RemoteDebug_StopAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { if (read_thread_ids(unwinder, st) < 0) { return -1; } for (size_t i = 0; i < st->count; i++) { pid_t tid = st->tids[i]; int ret = seize_thread(tid); if (ret == 1) { continue; // Thread gone, skip } if (ret < 0) { detach_threads(st, i); PyErr_Format(PyExc_RuntimeError, "Failed to seize thread %d: %s", tid, strerror(errno)); st->tids = NULL; st->count = 0; return -1; } if (ptrace(PTRACE_INTERRUPT, tid, NULL, NULL) == -1 && errno != ESRCH) { detach_threads(st, i + 1); PyErr_Format(PyExc_RuntimeError, "Failed to interrupt thread %d: %s", tid, strerror(errno)); st->tids = NULL; st->count = 0; return -1; } int status; if (waitpid(tid, &status, __WALL) == -1 && errno != ECHILD && errno != ESRCH) { detach_threads(st, i + 1); PyErr_Format(PyExc_RuntimeError, "waitpid failed for thread %d: %s", tid, strerror(errno)); st->tids = NULL; st->count = 0; return -1; } } return 0; } void _Py_RemoteDebug_ResumeAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { if (st->tids == NULL || st->count == 0) { return; } detach_threads(st, st->count); st->tids = NULL; st->count = 0; } #elif defined(MS_WINDOWS) void _Py_RemoteDebug_InitThreadsState(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { st->hProcess = NULL; st->suspended = 0; } int _Py_RemoteDebug_StopAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { static NtSuspendProcessFunc pNtSuspendProcess = NULL; static int tried_load = 0; if (!tried_load) { HMODULE hNtdll = GetModuleHandleW(L"ntdll.dll"); if (hNtdll) { pNtSuspendProcess = (NtSuspendProcessFunc)GetProcAddress(hNtdll, "NtSuspendProcess"); } tried_load = 1; } if (pNtSuspendProcess == NULL) { PyErr_SetString(PyExc_RuntimeError, "NtSuspendProcess not available"); return -1; } NTSTATUS status = pNtSuspendProcess(unwinder->handle.hProcess); if (status >= 0) { st->hProcess = unwinder->handle.hProcess; st->suspended = 1; _Py_RemoteDebug_ClearCache(&unwinder->handle); return 0; } PyErr_Format(PyExc_RuntimeError, "NtSuspendProcess failed: 0x%lx", status); return -1; } void _Py_RemoteDebug_ResumeAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { if (!st->suspended || st->hProcess == NULL) { return; } static NtResumeProcessFunc pNtResumeProcess = NULL; static int tried_load = 0; if (!tried_load) { HMODULE hNtdll = GetModuleHandleW(L"ntdll.dll"); if (hNtdll) { pNtResumeProcess = (NtResumeProcessFunc)GetProcAddress(hNtdll, "NtResumeProcess"); } tried_load = 1; } if (pNtResumeProcess != NULL) { pNtResumeProcess(st->hProcess); } st->hProcess = NULL; st->suspended = 0; } #else void _Py_RemoteDebug_InitThreadsState(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { (void)unwinder; (void)st; } int _Py_RemoteDebug_StopAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { (void)unwinder; (void)st; return 0; } void _Py_RemoteDebug_ResumeAllThreads(RemoteUnwinderObject *unwinder, _Py_RemoteDebug_ThreadsState *st) { (void)unwinder; (void)st; } #endif