gh-144563: Fix remote debugging with duplicate libpython mappings from ctypes (#144595)

When _ctypes is imported, it may call dlopen on the libpython shared
library, causing the dynamic linker to load a second mapping of the
library into the process address space. The remote debugging code
iterates memory regions from low addresses upward and returns the first
mapping whose filename matches libpython. After _ctypes is imported, it
finds the dlopen'd copy first, but that copy's PyRuntime section was
never initialized, so reading debug offsets from it fails.

Fix this by validating each candidate PyRuntime address before accepting
it. The validation reads the first 8 bytes and checks for the "xdebugpy"
cookie that is only present in an initialized PyRuntime. Uninitialized
duplicate mappings will fail this check and be skipped, allowing the
search to continue to the real, initialized PyRuntime.
This commit is contained in:
Pablo Galindo Salgado 2026-02-10 10:04:50 +00:00 committed by GitHub
parent d2d245942e
commit 2c1ca6bb5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 99 additions and 14 deletions

View file

@ -516,6 +516,44 @@ def foo():
finally:
_cleanup_sockets(client_socket, server_socket)
@skip_if_not_supported
@unittest.skipIf(
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,
"Test only runs on Linux with process_vm_readv support",
)
def test_self_trace_after_ctypes_import(self):
"""Test that RemoteUnwinder works on the same process after _ctypes import.
When _ctypes is imported, it may call dlopen on the libpython shared
library, creating a duplicate mapping in the process address space.
The remote debugging code must skip these uninitialized duplicate
mappings and find the real PyRuntime. See gh-144563.
"""
# Run the test in a subprocess to avoid side effects
script = textwrap.dedent("""\
import os
import _remote_debugging
# Should work before _ctypes import
unwinder = _remote_debugging.RemoteUnwinder(os.getpid())
import _ctypes
# Should still work after _ctypes import (gh-144563)
unwinder = _remote_debugging.RemoteUnwinder(os.getpid())
""")
result = subprocess.run(
[sys.executable, "-c", script],
capture_output=True,
text=True,
timeout=SHORT_TIMEOUT,
)
self.assertEqual(
result.returncode, 0,
f"stdout: {result.stdout}\nstderr: {result.stderr}"
)
@skip_if_not_supported
@unittest.skipIf(
sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED,

View file

@ -0,0 +1,4 @@
Fix interaction of the Tachyon profiler and :mod:`ctypes` and other modules
that load the Python shared library (if present) in an independent map as
this was causing the mechanism that loads the binary information to be
confused. Patch by Pablo Galindo

View file

@ -18,7 +18,8 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
#ifdef MS_WINDOWS
// On Windows, search for asyncio debug in executable or DLL
address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio");
address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio",
NULL);
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
@ -27,7 +28,8 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
}
#elif defined(__linux__) && HAVE_PROCESS_VM_READV
// On Linux, search for asyncio debug in executable or DLL
address = search_linux_map_for_section(handle, "AsyncioDebug", "python");
address = search_linux_map_for_section(handle, "AsyncioDebug", "python",
NULL);
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
@ -36,10 +38,12 @@ _Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle)
}
#elif defined(__APPLE__) && TARGET_OS_OSX
// On macOS, try libpython first, then fall back to python
address = search_map_for_section(handle, "AsyncioDebug", "libpython");
address = search_map_for_section(handle, "AsyncioDebug", "libpython",
NULL);
if (address == 0) {
PyErr_Clear();
address = search_map_for_section(handle, "AsyncioDebug", "python");
address = search_map_for_section(handle, "AsyncioDebug", "python",
NULL);
}
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL

View file

@ -150,6 +150,31 @@ typedef struct {
Py_ssize_t page_size;
} proc_handle_t;
// Forward declaration for use in validation function
static int
_Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst);
// Optional callback to validate a candidate section address found during
// memory map searches. Returns 1 if the address is valid, 0 to skip it.
// This allows callers to filter out duplicate/stale mappings (e.g. from
// ctypes dlopen) whose sections were never initialized.
typedef int (*section_validator_t)(proc_handle_t *handle, uintptr_t address);
// Validate that a candidate address starts with _Py_Debug_Cookie.
static int
_Py_RemoteDebug_ValidatePyRuntimeCookie(proc_handle_t *handle, uintptr_t address)
{
if (address == 0) {
return 0;
}
char buf[sizeof(_Py_Debug_Cookie) - 1];
if (_Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(buf), buf) != 0) {
PyErr_Clear();
return 0;
}
return memcmp(buf, _Py_Debug_Cookie, sizeof(buf)) == 0;
}
static void
_Py_RemoteDebug_FreePageCache(proc_handle_t *handle)
{
@ -509,7 +534,8 @@ pid_to_task(pid_t pid)
}
static uintptr_t
search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) {
search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr,
section_validator_t validator) {
mach_vm_address_t address = 0;
mach_vm_size_t size = 0;
mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t);
@ -561,7 +587,9 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s
if (strncmp(filename, substr, strlen(substr)) == 0) {
uintptr_t result = search_section_in_file(
secname, map_filename, address, size, proc_ref);
if (result != 0) {
if (result != 0
&& (validator == NULL || validator(handle, result)))
{
return result;
}
}
@ -678,7 +706,8 @@ search_elf_file_for_section(
}
static uintptr_t
search_linux_map_for_section(proc_handle_t *handle, const char* secname, const char* substr)
search_linux_map_for_section(proc_handle_t *handle, const char* secname, const char* substr,
section_validator_t validator)
{
char maps_file_path[64];
sprintf(maps_file_path, "/proc/%d/maps", handle->pid);
@ -753,9 +782,12 @@ search_linux_map_for_section(proc_handle_t *handle, const char* secname, const c
if (strstr(filename, substr)) {
retval = search_elf_file_for_section(handle, secname, start, path);
if (retval) {
if (retval
&& (validator == NULL || validator(handle, retval)))
{
break;
}
retval = 0;
}
}
@ -859,7 +891,8 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char*
static uintptr_t
search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr) {
search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr,
section_validator_t validator) {
HANDLE hProcSnap;
do {
hProcSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, handle->pid);
@ -882,8 +915,11 @@ search_windows_map_for_section(proc_handle_t* handle, const char* secname, const
for (BOOL hasModule = Module32FirstW(hProcSnap, &moduleEntry); hasModule; hasModule = Module32NextW(hProcSnap, &moduleEntry)) {
// Look for either python executable or DLL
if (wcsstr(moduleEntry.szModule, substr)) {
runtime_addr = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname);
if (runtime_addr != NULL) {
void *candidate = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname);
if (candidate != NULL
&& (validator == NULL || validator(handle, (uintptr_t)candidate)))
{
runtime_addr = candidate;
break;
}
}
@ -904,7 +940,8 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle)
#ifdef MS_WINDOWS
// On Windows, search for 'python' in executable or DLL
address = search_windows_map_for_section(handle, "PyRuntime", L"python");
address = search_windows_map_for_section(handle, "PyRuntime", L"python",
_Py_RemoteDebug_ValidatePyRuntimeCookie);
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
@ -915,7 +952,8 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle)
}
#elif defined(__linux__) && HAVE_PROCESS_VM_READV
// On Linux, search for 'python' in executable or DLL
address = search_linux_map_for_section(handle, "PyRuntime", "python");
address = search_linux_map_for_section(handle, "PyRuntime", "python",
_Py_RemoteDebug_ValidatePyRuntimeCookie);
if (address == 0) {
// Error out: 'python' substring covers both executable and DLL
PyObject *exc = PyErr_GetRaisedException();
@ -929,7 +967,8 @@ _Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle)
const char* candidates[] = {"libpython", "python", "Python", NULL};
for (const char** candidate = candidates; *candidate; candidate++) {
PyErr_Clear();
address = search_map_for_section(handle, "PyRuntime", *candidate);
address = search_map_for_section(handle, "PyRuntime", *candidate,
_Py_RemoteDebug_ValidatePyRuntimeCookie);
if (address != 0) {
break;
}