diff --git a/src/crawler.ts b/src/crawler.ts index 1033df86..12f0fe03 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -878,7 +878,7 @@ self.__bx_behaviors.selectMainBehavior(); try { const { fetched, mime, ts } = await timedRun( directFetchCapture({ url, headers: this.headers, cdp }), - FETCH_TIMEOUT_SECS, + this.params.pageLoadTimeout, "Direct fetch capture attempt timed out", logDetails, "fetch", diff --git a/src/util/recorder.ts b/src/util/recorder.ts index f9481f2a..e391894f 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -670,8 +670,10 @@ export class Recorder { // if contentLength is large or unknown, do streaming, unless its an essential resource // in which case, need to do a full fetch either way + // don't count non-200 responses which may not have content-length if ( (contentLen < 0 || contentLen > MAX_BROWSER_DEFAULT_FETCH_SIZE) && + responseStatusCode === 200 && !this.isEssentialResource(reqresp.resourceType, mimeType) ) { const opts: ResponseStreamAsyncFetchOptions = { @@ -1030,7 +1032,7 @@ export class Recorder { } isEssentialResource(resourceType: string | undefined, contentType: string) { - if (["document", "stylesheet", "script"].includes(resourceType || "")) { + if (resourceType === "script" || resourceType === "stylesheet") { return true; }