skipping resources: ensure HEAD, OPTIONS, 206, and 304 response/request pairs are not written to WARC (#460)

Allows for skipping network traffic that doesn't need to be stored, as
it is not necessary/will result in incorrect replay (eg. 304 instead of
a 200).
This commit is contained in:
Ilya Kreymer 2024-01-17 14:27:51 -08:00 committed by GitHub
parent 2fc0f67f04
commit 18ffb3d971
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 21 additions and 2 deletions

View file

@ -912,8 +912,14 @@ export class Recorder {
}
async serializeToWARC(reqresp: RequestResponseInfo) {
if (!reqresp.payload) {
logNetwork("Not writing, no payload", { url: reqresp.url });
if (reqresp.shouldSkipSave()) {
const { url, method, status, payload } = reqresp;
logNetwork("Skipping request/response", {
url,
method,
status,
payloadLength: payload && payload.length,
});
return;
}

View file

@ -270,6 +270,19 @@ export class RequestResponseInfo {
return true;
}
shouldSkipSave() {
// skip OPTIONS/HEAD responses, and 304 or 206 responses
if (
!this.payload ||
(this.method && ["OPTIONS", "HEAD"].includes(this.method)) ||
[206, 304].includes(this.status)
) {
return true;
}
return false;
}
getCanonURL(): string {
if (!this.method || this.method === "GET") {
return this.url;