mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
set failed URL retry to 5 by default
This commit is contained in:
parent
5d9c62e264
commit
5961a521c2
3 changed files with 5 additions and 4 deletions
|
@ -1921,7 +1921,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
} else if (!downloadResponse) {
|
||||
// log if not already log and rethrow, consider page failed
|
||||
if (msg !== "logged") {
|
||||
logger.error("Page Load Failed, skipping page", {
|
||||
logger.error("Page Load Failed, will retry", {
|
||||
msg,
|
||||
loadState: data.loadState,
|
||||
...logDetails,
|
||||
|
|
|
@ -27,6 +27,7 @@ export const ADD_LINK_FUNC = "__bx_addLink";
|
|||
export const FETCH_FUNC = "__bx_fetch";
|
||||
|
||||
export const MAX_DEPTH = 1000000;
|
||||
export const MAX_RETRY_FAILED = 5;
|
||||
|
||||
export const FETCH_HEADERS_TIMEOUT_SECS = 30;
|
||||
export const PAGE_OP_TIMEOUT_SECS = 5;
|
||||
|
|
|
@ -3,7 +3,7 @@ import { v4 as uuidv4 } from "uuid";
|
|||
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
import { MAX_DEPTH } from "./constants.js";
|
||||
import { MAX_DEPTH, MAX_RETRY_FAILED } from "./constants.js";
|
||||
import { ScopedSeed } from "./seeds.js";
|
||||
import { Frame } from "puppeteer-core";
|
||||
|
||||
|
@ -170,7 +170,7 @@ export type SaveState = {
|
|||
// ============================================================================
|
||||
export class RedisCrawlState {
|
||||
redis: Redis;
|
||||
maxRetryPending = 1;
|
||||
maxRetryPending = MAX_RETRY_FAILED;
|
||||
|
||||
uid: string;
|
||||
key: string;
|
||||
|
@ -608,7 +608,7 @@ return inx;
|
|||
}
|
||||
|
||||
if (retryFailed) {
|
||||
logger.debug("Retring failed URL", { url: data.url }, "state");
|
||||
logger.debug("Retrying failed URL", { url: data.url }, "state");
|
||||
}
|
||||
|
||||
await this.markStarted(data.url);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue