skipping resources: ensure HEAD, OPTIONS, 204, 206, and 304 response/request pairs are not written to WARC

This commit is contained in:
Ilya Kreymer 2024-01-16 18:31:03 -08:00
parent 2fc0f67f04
commit bc201be7f1
2 changed files with 21 additions and 2 deletions

View file

@ -912,8 +912,14 @@ export class Recorder {
} }
async serializeToWARC(reqresp: RequestResponseInfo) { async serializeToWARC(reqresp: RequestResponseInfo) {
if (!reqresp.payload) { if (reqresp.shouldSkip()) {
logNetwork("Not writing, no payload", { url: reqresp.url }); const { url, method, status, payload } = reqresp;
logNetwork("Skipping request/response", {
url,
method,
status,
payloadLength: payload && payload.length,
});
return; return;
} }

View file

@ -270,6 +270,19 @@ export class RequestResponseInfo {
return true; return true;
} }
shouldSkip() {
// skip OPTIONS/HEAD responses, and 304, 204 or 206 responses
if (
!this.payload ||
(this.method && ["OPTIONS", "HEAD"].includes(this.method)) ||
[204, 206, 304].includes(this.status)
) {
return true;
}
return false;
}
getCanonURL(): string { getCanonURL(): string {
if (!this.method || this.method === "GET") { if (!this.method || this.method === "GET") {
return this.url; return this.url;