remove early serialization which may result in missing WARC-Protocol and security metadata (#844)

- drop early serialization in handleFetchResponse(), can result in
writing WARC record too early, before the WARC-Protocol and other data
is available. (Added previously for requests loaded via browser context /
service worker which did not get a 'loadingFinished' message, but now
these will still be closed in awaitPageResources())
- don't log 'skipping URL from unknown frame' warning since it is often
spurious, since frame can be added in subsequent message and response is
*not* skipped.
This commit is contained in:
Ilya Kreymer 2025-05-29 08:33:30 -07:00 committed by GitHub
parent 7bf10f7f18
commit 178b10a37f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 0 additions and 18 deletions

View file

@ -577,12 +577,6 @@ export class Browser {
} }
if (!foundRecorder) { if (!foundRecorder) {
logger.warn(
"Skipping URL from unknown frame",
{ url, frameId },
"recorder",
);
try { try {
await this.firstCDP.send("Fetch.continueResponse", { requestId }); await this.firstCDP.send("Fetch.continueResponse", { requestId });
} catch (e) { } catch (e) {

View file

@ -804,18 +804,6 @@ export class Recorder extends EventEmitter {
const rewritten = await this.rewriteResponse(reqresp, mimeType); const rewritten = await this.rewriteResponse(reqresp, mimeType);
// if in browser context, and not also intercepted in page context
// serialize here, as won't be getting a loadingFinished message for it
if (
isBrowserContext &&
!reqresp.inPageContext &&
!reqresp.asyncLoading &&
reqresp.payload
) {
this.removeReqResp(networkId);
await this.serializeToWARC(reqresp);
}
// not rewritten, and not streaming, return false to continue // not rewritten, and not streaming, return false to continue
if (!rewritten && !streamingConsume) { if (!rewritten && !streamingConsume) {
if (!reqresp.payload) { if (!reqresp.payload) {