mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
remove early serialization which may result in missing WARC-Protocol and security metadata (#844)
- drop early serialization in handleFetchResponse(), can result in writing WARC record too early, before the WARC-Protocol and other data is available. (Added previously for requests loaded via browser context / service worker which did not get a 'loadingFinished' message, but now these will still be closed in awaitPageResources()) - don't log 'skipping URL from unknown frame' warning since it is often spurious, since frame can be added in subsequent message and response is *not* skipped.
This commit is contained in:
parent
7bf10f7f18
commit
178b10a37f
2 changed files with 0 additions and 18 deletions
|
@ -577,12 +577,6 @@ export class Browser {
|
|||
}
|
||||
|
||||
if (!foundRecorder) {
|
||||
logger.warn(
|
||||
"Skipping URL from unknown frame",
|
||||
{ url, frameId },
|
||||
"recorder",
|
||||
);
|
||||
|
||||
try {
|
||||
await this.firstCDP.send("Fetch.continueResponse", { requestId });
|
||||
} catch (e) {
|
||||
|
|
|
@ -804,18 +804,6 @@ export class Recorder extends EventEmitter {
|
|||
|
||||
const rewritten = await this.rewriteResponse(reqresp, mimeType);
|
||||
|
||||
// if in browser context, and not also intercepted in page context
|
||||
// serialize here, as won't be getting a loadingFinished message for it
|
||||
if (
|
||||
isBrowserContext &&
|
||||
!reqresp.inPageContext &&
|
||||
!reqresp.asyncLoading &&
|
||||
reqresp.payload
|
||||
) {
|
||||
this.removeReqResp(networkId);
|
||||
await this.serializeToWARC(reqresp);
|
||||
}
|
||||
|
||||
// not rewritten, and not streaming, return false to continue
|
||||
if (!rewritten && !streamingConsume) {
|
||||
if (!reqresp.payload) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue