From bc201be7f152d8da7426a4d3fb3a9ae3e4659f1c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Tue, 16 Jan 2024 18:31:03 -0800 Subject: [PATCH] skipping resources: ensure HEAD, OPTIONS, 204, 206, and 304 response/request pairs are not written to WARC --- src/util/recorder.ts | 10 ++++++++-- src/util/reqresp.ts | 13 +++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/util/recorder.ts b/src/util/recorder.ts index 4dbf97d3..079acc2c 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -912,8 +912,14 @@ export class Recorder { } async serializeToWARC(reqresp: RequestResponseInfo) { - if (!reqresp.payload) { - logNetwork("Not writing, no payload", { url: reqresp.url }); + if (reqresp.shouldSkip()) { + const { url, method, status, payload } = reqresp; + logNetwork("Skipping request/response", { + url, + method, + status, + payloadLength: payload && payload.length, + }); return; } diff --git a/src/util/reqresp.ts b/src/util/reqresp.ts index 9e705ed0..8c0c316a 100644 --- a/src/util/reqresp.ts +++ b/src/util/reqresp.ts @@ -270,6 +270,19 @@ export class RequestResponseInfo { return true; } + shouldSkip() { + // skip OPTIONS/HEAD responses, and 304, 204 or 206 responses + if ( + !this.payload || + (this.method && ["OPTIONS", "HEAD"].includes(this.method)) || + [204, 206, 304].includes(this.status) + ) { + return true; + } + + return false; + } + getCanonURL(): string { if (!this.method || this.method === "GET") { return this.url;