mirror of
https://github.com/python/cpython.git
synced 2026-05-05 01:51:01 +00:00
317 lines
11 KiB
C
317 lines
11 KiB
C
/******************************************************************************
|
|
* Remote Debugging Module - Object Reading Functions
|
|
*
|
|
* This file contains functions for reading Python objects from remote
|
|
* process memory, including strings, bytes, and integers.
|
|
******************************************************************************/
|
|
|
|
#include "_remote_debugging.h"
|
|
|
|
/* ============================================================================
|
|
* MEMORY READING FUNCTIONS
|
|
* ============================================================================ */
|
|
|
|
#define DEFINE_MEMORY_READER(type_name, c_type, error_msg) \
|
|
int \
|
|
read_##type_name(RemoteUnwinderObject *unwinder, uintptr_t address, c_type *result) \
|
|
{ \
|
|
int res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address, sizeof(c_type), result); \
|
|
if (res < 0) { \
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, error_msg); \
|
|
return -1; \
|
|
} \
|
|
return 0; \
|
|
}
|
|
|
|
DEFINE_MEMORY_READER(ptr, uintptr_t, "Failed to read pointer from remote memory")
|
|
DEFINE_MEMORY_READER(Py_ssize_t, Py_ssize_t, "Failed to read Py_ssize_t from remote memory")
|
|
DEFINE_MEMORY_READER(char, char, "Failed to read char from remote memory")
|
|
|
|
int
|
|
read_py_ptr(RemoteUnwinderObject *unwinder, uintptr_t address, uintptr_t *ptr_addr)
|
|
{
|
|
if (read_ptr(unwinder, address, ptr_addr)) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read Python pointer");
|
|
return -1;
|
|
}
|
|
*ptr_addr &= ~Py_TAG_BITS;
|
|
return 0;
|
|
}
|
|
|
|
/* ============================================================================
|
|
* PYTHON OBJECT READING FUNCTIONS
|
|
* ============================================================================ */
|
|
|
|
PyObject *
|
|
read_py_str(
|
|
RemoteUnwinderObject *unwinder,
|
|
uintptr_t address,
|
|
Py_ssize_t max_len
|
|
) {
|
|
// Read the entire PyUnicodeObject at once; for short strings the data
|
|
// is inline right after the header and we'll already have (some of) it.
|
|
char unicode_obj[SIZEOF_UNICODE_OBJ];
|
|
int res = _Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
address,
|
|
SIZEOF_UNICODE_OBJ,
|
|
unicode_obj
|
|
);
|
|
if (res < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyUnicodeObject");
|
|
return NULL;
|
|
}
|
|
|
|
Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length);
|
|
if (len < 0 || len > max_len) {
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Invalid string length (%zd) at 0x%lx", len, address);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid string length in remote Unicode object");
|
|
return NULL;
|
|
}
|
|
|
|
// Inspect state to pick the right data offset and character width.
|
|
// We rely on the remote process sharing this Python version's
|
|
// PyASCIIObject layout, the same assumption already used for `length`.
|
|
struct _PyUnicodeObject_state state = GET_MEMBER(
|
|
struct _PyUnicodeObject_state,
|
|
unicode_obj,
|
|
unwinder->debug_offsets.unicode_object.state);
|
|
|
|
if (!state.compact) {
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Cannot read non-compact Unicode object at 0x%lx", address);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError,
|
|
"Legacy (non-compact) Unicode objects are not supported");
|
|
return NULL;
|
|
}
|
|
|
|
int kind = (int)state.kind;
|
|
Py_UCS4 max_char;
|
|
switch (kind) {
|
|
case PyUnicode_1BYTE_KIND:
|
|
max_char = state.ascii ? 0x7F : 0xFF;
|
|
break;
|
|
case PyUnicode_2BYTE_KIND:
|
|
max_char = 0xFFFF;
|
|
break;
|
|
case PyUnicode_4BYTE_KIND:
|
|
max_char = 0x10FFFF;
|
|
break;
|
|
default:
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Invalid Unicode kind %d at 0x%lx", kind, address);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError,
|
|
"Invalid kind in remote Unicode object");
|
|
return NULL;
|
|
}
|
|
|
|
size_t header_size = state.ascii
|
|
? (size_t)unwinder->debug_offsets.unicode_object.asciiobject_size
|
|
: (size_t)unwinder->debug_offsets.unicode_object.compactunicodeobject_size;
|
|
|
|
// len * kind is bounded by max_len * 4 (kind <= 4, len <= max_len), so
|
|
// the multiplication can't overflow for any caller-sane max_len, but the
|
|
// explicit cap here keeps a corrupted remote `length` from later turning
|
|
// into a giant allocation.
|
|
size_t nbytes = (size_t)len * (size_t)kind;
|
|
if ((size_t)len > (SIZE_MAX / 4) || nbytes > (size_t)max_len * 4) {
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Implausible Unicode byte size %zu at 0x%lx", nbytes, address);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError,
|
|
"Garbage byte size in remote Unicode object");
|
|
return NULL;
|
|
}
|
|
|
|
PyObject *result = PyUnicode_New(len, max_char);
|
|
if (result == NULL) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to allocate PyUnicode for remote string");
|
|
return NULL;
|
|
}
|
|
if (nbytes == 0) {
|
|
return result;
|
|
}
|
|
|
|
void *data = PyUnicode_DATA(result);
|
|
|
|
// Reuse data already present in the header read; only round-trip for
|
|
// whatever spills past it.
|
|
size_t inline_avail = (header_size < SIZEOF_UNICODE_OBJ)
|
|
? SIZEOF_UNICODE_OBJ - header_size
|
|
: 0;
|
|
size_t inline_bytes = nbytes < inline_avail ? nbytes : inline_avail;
|
|
if (inline_bytes > 0) {
|
|
memcpy(data, unicode_obj + header_size, inline_bytes);
|
|
}
|
|
|
|
if (nbytes > inline_bytes) {
|
|
res = _Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
address + header_size + inline_bytes,
|
|
nbytes - inline_bytes,
|
|
(char *)data + inline_bytes);
|
|
if (res < 0) {
|
|
Py_DECREF(result);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read string data from remote memory");
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
PyObject *
|
|
read_py_bytes(
|
|
RemoteUnwinderObject *unwinder,
|
|
uintptr_t address,
|
|
Py_ssize_t max_len
|
|
) {
|
|
PyObject *result = NULL;
|
|
char *buf = NULL;
|
|
|
|
// Read the entire PyBytesObject at once
|
|
char bytes_obj[SIZEOF_BYTES_OBJ];
|
|
int res = _Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
address,
|
|
SIZEOF_BYTES_OBJ,
|
|
bytes_obj
|
|
);
|
|
if (res < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyBytesObject");
|
|
goto err;
|
|
}
|
|
|
|
Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size);
|
|
if (len < 0 || len > max_len) {
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Invalid bytes length (%zd) at 0x%lx", len, address);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Invalid bytes length in remote bytes object");
|
|
return NULL;
|
|
}
|
|
|
|
buf = (char *)PyMem_RawMalloc(len+1);
|
|
if (buf == NULL) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate buffer for bytes reading");
|
|
return NULL;
|
|
}
|
|
|
|
size_t offset = (size_t)unwinder->debug_offsets.bytes_object.ob_sval;
|
|
res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf);
|
|
if (res < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read bytes data from remote memory");
|
|
goto err;
|
|
}
|
|
buf[len] = '\0';
|
|
|
|
result = PyBytes_FromStringAndSize(buf, len);
|
|
if (result == NULL) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create PyBytes from remote bytes data");
|
|
goto err;
|
|
}
|
|
|
|
PyMem_RawFree(buf);
|
|
assert(result != NULL);
|
|
return result;
|
|
|
|
err:
|
|
if (buf != NULL) {
|
|
PyMem_RawFree(buf);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
long
|
|
read_py_long(
|
|
RemoteUnwinderObject *unwinder,
|
|
uintptr_t address
|
|
)
|
|
{
|
|
unsigned int shift = PYLONG_BITS_IN_DIGIT;
|
|
|
|
// Read the entire PyLongObject at once
|
|
char long_obj[SIZEOF_LONG_OBJ];
|
|
int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
address,
|
|
(size_t)unwinder->debug_offsets.long_object.size,
|
|
long_obj);
|
|
if (bytes_read < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLongObject");
|
|
return -1;
|
|
}
|
|
|
|
uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag);
|
|
int negative = (lv_tag & 3) == 2;
|
|
Py_ssize_t size = lv_tag >> 3;
|
|
|
|
if (size == 0) {
|
|
return 0;
|
|
}
|
|
|
|
// Validate size: reject garbage (negative or unreasonably large)
|
|
if (size < 0 || size > MAX_LONG_DIGITS) {
|
|
PyErr_Format(PyExc_RuntimeError,
|
|
"Invalid PyLong digit count: %zd (expected 0-%d)", size, MAX_LONG_DIGITS);
|
|
set_exception_cause(unwinder, PyExc_RuntimeError,
|
|
"Invalid PyLong size (corrupted remote memory)");
|
|
return -1;
|
|
}
|
|
|
|
// Calculate how many digits fit inline in our local buffer
|
|
Py_ssize_t ob_digit_offset = unwinder->debug_offsets.long_object.ob_digit;
|
|
Py_ssize_t inline_digits_space = SIZEOF_LONG_OBJ - ob_digit_offset;
|
|
Py_ssize_t max_inline_digits = inline_digits_space / (Py_ssize_t)sizeof(digit);
|
|
|
|
// If the long object has inline digits that fit in our buffer, use them directly
|
|
digit *digits;
|
|
if (size <= max_inline_digits && size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) {
|
|
// For small integers, digits are inline in the long_value.ob_digit array
|
|
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
|
|
if (!digits) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for small PyLong");
|
|
return -1;
|
|
}
|
|
memcpy(digits, long_obj + ob_digit_offset, size * sizeof(digit));
|
|
} else {
|
|
// For larger integers, we need to read the digits separately
|
|
digits = (digit *)PyMem_RawMalloc(size * sizeof(digit));
|
|
if (!digits) {
|
|
PyErr_NoMemory();
|
|
set_exception_cause(unwinder, PyExc_MemoryError, "Failed to allocate digits for large PyLong");
|
|
return -1;
|
|
}
|
|
|
|
bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory(
|
|
&unwinder->handle,
|
|
address + (uintptr_t)unwinder->debug_offsets.long_object.ob_digit,
|
|
sizeof(digit) * size,
|
|
digits
|
|
);
|
|
if (bytes_read < 0) {
|
|
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read PyLong digits from remote memory");
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
long long value = 0;
|
|
|
|
// In theory this can overflow, but because of llvm/llvm-project#16778
|
|
// we can't use __builtin_mul_overflow because it fails to link with
|
|
// __muloti4 on aarch64. In practice this is fine because all we're
|
|
// testing here are task numbers that would fit in a single byte.
|
|
for (Py_ssize_t i = 0; i < size; ++i) {
|
|
long long factor = digits[i] * (1UL << (Py_ssize_t)(shift * i));
|
|
value += factor;
|
|
}
|
|
PyMem_RawFree(digits);
|
|
if (negative) {
|
|
value *= -1;
|
|
}
|
|
return (long)value;
|
|
error:
|
|
PyMem_RawFree(digits);
|
|
return -1;
|
|
}
|