mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
fix for direct fetch timeouts (#677)
- use '--timeout' value for direct fetch timeout, instead of fixed 30 seconds - don't consider 'document' as essential resource regardless of mime type, as any top-level URL is a document - don't count non-200 responses as non-essential even if missing content-type fixes #676
This commit is contained in:
parent
85a07aff18
commit
0d6a0b0efa
2 changed files with 4 additions and 2 deletions
|
@ -878,7 +878,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
try {
|
||||
const { fetched, mime, ts } = await timedRun(
|
||||
directFetchCapture({ url, headers: this.headers, cdp }),
|
||||
FETCH_TIMEOUT_SECS,
|
||||
this.params.pageLoadTimeout,
|
||||
"Direct fetch capture attempt timed out",
|
||||
logDetails,
|
||||
"fetch",
|
||||
|
|
|
@ -670,8 +670,10 @@ export class Recorder {
|
|||
|
||||
// if contentLength is large or unknown, do streaming, unless its an essential resource
|
||||
// in which case, need to do a full fetch either way
|
||||
// don't count non-200 responses which may not have content-length
|
||||
if (
|
||||
(contentLen < 0 || contentLen > MAX_BROWSER_DEFAULT_FETCH_SIZE) &&
|
||||
responseStatusCode === 200 &&
|
||||
!this.isEssentialResource(reqresp.resourceType, mimeType)
|
||||
) {
|
||||
const opts: ResponseStreamAsyncFetchOptions = {
|
||||
|
@ -1030,7 +1032,7 @@ export class Recorder {
|
|||
}
|
||||
|
||||
isEssentialResource(resourceType: string | undefined, contentType: string) {
|
||||
if (["document", "stylesheet", "script"].includes(resourceType || "")) {
|
||||
if (resourceType === "script" || resourceType === "stylesheet") {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue