mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
remove early serialization which may result in missing WARC-Protocol and security metadata (#844)
- drop early serialization in handleFetchResponse(), can result in writing WARC record too early, before the WARC-Protocol and other data is available. (Added previously for requests loaded via browser context / service worker which did not get a 'loadingFinished' message, but now these will still be closed in awaitPageResources()) - don't log 'skipping URL from unknown frame' warning since it is often spurious, since frame can be added in subsequent message and response is *not* skipped.
This commit is contained in:
parent
7bf10f7f18
commit
178b10a37f
2 changed files with 0 additions and 18 deletions
|
@ -577,12 +577,6 @@ export class Browser {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!foundRecorder) {
|
if (!foundRecorder) {
|
||||||
logger.warn(
|
|
||||||
"Skipping URL from unknown frame",
|
|
||||||
{ url, frameId },
|
|
||||||
"recorder",
|
|
||||||
);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await this.firstCDP.send("Fetch.continueResponse", { requestId });
|
await this.firstCDP.send("Fetch.continueResponse", { requestId });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|
|
@ -804,18 +804,6 @@ export class Recorder extends EventEmitter {
|
||||||
|
|
||||||
const rewritten = await this.rewriteResponse(reqresp, mimeType);
|
const rewritten = await this.rewriteResponse(reqresp, mimeType);
|
||||||
|
|
||||||
// if in browser context, and not also intercepted in page context
|
|
||||||
// serialize here, as won't be getting a loadingFinished message for it
|
|
||||||
if (
|
|
||||||
isBrowserContext &&
|
|
||||||
!reqresp.inPageContext &&
|
|
||||||
!reqresp.asyncLoading &&
|
|
||||||
reqresp.payload
|
|
||||||
) {
|
|
||||||
this.removeReqResp(networkId);
|
|
||||||
await this.serializeToWARC(reqresp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// not rewritten, and not streaming, return false to continue
|
// not rewritten, and not streaming, return false to continue
|
||||||
if (!rewritten && !streamingConsume) {
|
if (!rewritten && !streamingConsume) {
|
||||||
if (!reqresp.payload) {
|
if (!reqresp.payload) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue