This commit is contained in:
Maurycy Pawłowski-Wieroński 2026-05-04 01:31:39 +02:00 committed by GitHub
commit 93063c3251
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 47 additions and 29 deletions

View file

@ -61,11 +61,36 @@ extern "C" {
#define HDR_SIZE_COMPRESSION 4
#define FILE_HEADER_SIZE (HDR_OFF_COMPRESSION + HDR_SIZE_COMPRESSION)
#define FILE_HEADER_PLACEHOLDER_SIZE 64
#define SAMPLE_HEADER_FIXED_SIZE (sizeof(uint64_t) + sizeof(uint32_t) + 1)
static_assert(FILE_HEADER_SIZE <= FILE_HEADER_PLACEHOLDER_SIZE,
"FILE_HEADER_SIZE exceeds FILE_HEADER_PLACEHOLDER_SIZE");
/* Sample header field offsets and sizes */
#define SMP_OFF_THREAD_ID 0
#define SMP_SIZE_THREAD_ID sizeof(uint64_t)
#define SMP_OFF_INTERPRETER_ID (SMP_OFF_THREAD_ID + SMP_SIZE_THREAD_ID)
#define SMP_SIZE_INTERPRETER_ID sizeof(uint32_t)
#define SMP_OFF_ENCODING (SMP_OFF_INTERPRETER_ID + SMP_SIZE_INTERPRETER_ID)
#define SMP_SIZE_ENCODING sizeof(uint8_t)
#define SAMPLE_HEADER_FIXED_SIZE (SMP_OFF_ENCODING + SMP_SIZE_ENCODING)
static_assert(SAMPLE_HEADER_FIXED_SIZE == 13,
"SAMPLE_HEADER_FIXED_SIZE must remain 13");
/* Footer field offsets and sizes */
#define FTR_OFF_STRINGS 0
#define FTR_SIZE_STRINGS sizeof(uint32_t)
#define FTR_OFF_FRAMES (FTR_OFF_STRINGS + FTR_SIZE_STRINGS)
#define FTR_SIZE_FRAMES sizeof(uint32_t)
#define FTR_OFF_FILE_SIZE (FTR_OFF_FRAMES + FTR_SIZE_FRAMES)
#define FTR_SIZE_FILE_SIZE sizeof(uint64_t)
#define FTR_OFF_CHECKSUM (FTR_OFF_FILE_SIZE + FTR_SIZE_FILE_SIZE)
#define FTR_SIZE_CHECKSUM (2 * sizeof(uint64_t))
#define FILE_FOOTER_SIZE (FTR_OFF_CHECKSUM + FTR_SIZE_CHECKSUM)
static_assert(FILE_FOOTER_SIZE == 32,
"FILE_FOOTER_SIZE must remain 32");
/* Buffer sizes: 512KB balances syscall amortization against memory use,
* and aligns well with filesystem block sizes and zstd dictionary windows */
#define WRITE_BUFFER_SIZE (512 * 1024)

View file

@ -23,15 +23,11 @@
* ============================================================================ */
/* File structure sizes */
#define FILE_FOOTER_SIZE 32
#define MIN_DECOMPRESS_BUFFER_SIZE (64 * 1024) /* Minimum decompression buffer */
/* Progress callback frequency */
#define PROGRESS_CALLBACK_INTERVAL 1000
/* Maximum decompression size limit (1GB) */
#define MAX_DECOMPRESS_SIZE (1ULL << 30)
/* ============================================================================
* BINARY READER IMPLEMENTATION
* ============================================================================ */
@ -47,8 +43,8 @@ reader_parse_header(BinaryReader *reader, const uint8_t *data, size_t file_size)
/* Use memcpy to avoid strict aliasing violations and unaligned access */
uint32_t magic;
uint32_t version;
memcpy(&magic, &data[0], sizeof(magic));
memcpy(&version, &data[4], sizeof(version));
memcpy(&magic, &data[HDR_OFF_MAGIC], HDR_SIZE_MAGIC);
memcpy(&version, &data[HDR_OFF_VERSION], HDR_SIZE_VERSION);
/* Detect endianness from magic number */
if (magic == BINARY_FORMAT_MAGIC) {
@ -119,8 +115,8 @@ reader_parse_footer(BinaryReader *reader, const uint8_t *data, size_t file_size)
const uint8_t *footer = data + file_size - FILE_FOOTER_SIZE;
/* Use memcpy to avoid strict aliasing violations */
uint32_t strings_count, frames_count;
memcpy(&strings_count, &footer[0], sizeof(strings_count));
memcpy(&frames_count, &footer[4], sizeof(frames_count));
memcpy(&strings_count, &footer[FTR_OFF_STRINGS], FTR_SIZE_STRINGS);
memcpy(&frames_count, &footer[FTR_OFF_FRAMES], FTR_SIZE_FRAMES);
reader->strings_count = SWAP32_IF(reader->needs_swap, strings_count);
reader->frames_count = SWAP32_IF(reader->needs_swap, frames_count);
@ -984,11 +980,11 @@ binary_reader_replay(BinaryReader *reader, PyObject *collector, PyObject *progre
/* Use memcpy to avoid strict aliasing violations, then byte-swap if needed */
uint64_t thread_id_raw;
uint32_t interpreter_id_raw;
memcpy(&thread_id_raw, &reader->sample_data[offset], sizeof(thread_id_raw));
offset += 8;
memcpy(&thread_id_raw, &reader->sample_data[offset], SMP_SIZE_THREAD_ID);
offset += SMP_SIZE_THREAD_ID;
memcpy(&interpreter_id_raw, &reader->sample_data[offset], sizeof(interpreter_id_raw));
offset += 4;
memcpy(&interpreter_id_raw, &reader->sample_data[offset], SMP_SIZE_INTERPRETER_ID);
offset += SMP_SIZE_INTERPRETER_ID;
uint64_t thread_id = SWAP64_IF(reader->needs_swap, thread_id_raw);
uint32_t interpreter_id = SWAP32_IF(reader->needs_swap, interpreter_id_raw);

View file

@ -29,9 +29,6 @@
/* Frame buffer: depth varint (max 2 bytes for 256) + 256 frames * 5 bytes/varint + margin */
#define MAX_FRAME_BUFFER_SIZE ((MAX_STACK_DEPTH * MAX_VARINT_SIZE_U32) + MAX_VARINT_SIZE_U32 + 16)
/* File structure sizes */
#define FILE_FOOTER_SIZE 32
/* Helper macro: convert PyLong to int32, using default_val if conversion fails */
#define PYLONG_TO_INT32_OR_DEFAULT(obj, var, default_val) \
do { \
@ -588,9 +585,9 @@ static inline int
write_sample_header(BinaryWriter *writer, ThreadEntry *entry, uint8_t encoding)
{
uint8_t header[SAMPLE_HEADER_FIXED_SIZE];
memcpy(header, &entry->thread_id, 8);
memcpy(header + 8, &entry->interpreter_id, 4);
header[12] = encoding;
memcpy(header + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
memcpy(header + SMP_OFF_INTERPRETER_ID, &entry->interpreter_id, SMP_SIZE_INTERPRETER_ID);
header[SMP_OFF_ENCODING] = encoding;
return writer_write_bytes(writer, header, SAMPLE_HEADER_FIXED_SIZE);
}
@ -649,9 +646,9 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
{
/* Header: thread_id(8) + interpreter_id(4) + encoding(1) + delta(varint) + status(1) */
uint8_t header_buf[SAMPLE_HEADER_MAX_SIZE];
memcpy(header_buf, &entry->thread_id, 8);
memcpy(header_buf + 8, &entry->interpreter_id, 4);
header_buf[12] = (uint8_t)encoding_type;
memcpy(header_buf + SMP_OFF_THREAD_ID, &entry->thread_id, SMP_SIZE_THREAD_ID);
memcpy(header_buf + SMP_OFF_INTERPRETER_ID, &entry->interpreter_id, SMP_SIZE_INTERPRETER_ID);
header_buf[SMP_OFF_ENCODING] = (uint8_t)encoding_type;
size_t varint_len = encode_varint_u64(
header_buf + SAMPLE_HEADER_FIXED_SIZE,
timestamp_delta);
@ -1145,17 +1142,17 @@ binary_writer_finalize(BinaryWriter *writer)
PyErr_SetFromErrno(PyExc_IOError);
return -1;
}
uint64_t file_size = (uint64_t)footer_offset + 32;
uint8_t footer[32] = {0};
uint64_t file_size = (uint64_t)footer_offset + FILE_FOOTER_SIZE;
uint8_t footer[FILE_FOOTER_SIZE] = {0};
/* Cast size_t to uint32_t before memcpy to ensure correct bytes are copied
* on both little-endian and big-endian systems (size_t is 8 bytes on 64-bit) */
uint32_t string_count_u32 = (uint32_t)writer->string_count;
uint32_t frame_count_u32 = (uint32_t)writer->frame_count;
memcpy(footer + 0, &string_count_u32, 4);
memcpy(footer + 4, &frame_count_u32, 4);
memcpy(footer + 8, &file_size, 8);
/* bytes 16-31: checksum placeholder (zeros) */
if (fwrite_checked_allow_threads(footer, 32, writer->fp) < 0) {
memcpy(footer + FTR_OFF_STRINGS, &string_count_u32, FTR_SIZE_STRINGS);
memcpy(footer + FTR_OFF_FRAMES, &frame_count_u32, FTR_SIZE_FRAMES);
memcpy(footer + FTR_OFF_FILE_SIZE, &file_size, FTR_SIZE_FILE_SIZE);
/* checksum (FTR_OFF_CHECKSUM..FILE_FOOTER_SIZE-1): placeholder zeros */
if (fwrite_checked_allow_threads(footer, FILE_FOOTER_SIZE, writer->fp) < 0) {
return -1;
}