mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
set failed URL retry to 5 by default
This commit is contained in:
parent
5d9c62e264
commit
5961a521c2
3 changed files with 5 additions and 4 deletions
|
@ -1921,7 +1921,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
} else if (!downloadResponse) {
|
} else if (!downloadResponse) {
|
||||||
// log if not already log and rethrow, consider page failed
|
// log if not already log and rethrow, consider page failed
|
||||||
if (msg !== "logged") {
|
if (msg !== "logged") {
|
||||||
logger.error("Page Load Failed, skipping page", {
|
logger.error("Page Load Failed, will retry", {
|
||||||
msg,
|
msg,
|
||||||
loadState: data.loadState,
|
loadState: data.loadState,
|
||||||
...logDetails,
|
...logDetails,
|
||||||
|
|
|
@ -27,6 +27,7 @@ export const ADD_LINK_FUNC = "__bx_addLink";
|
||||||
export const FETCH_FUNC = "__bx_fetch";
|
export const FETCH_FUNC = "__bx_fetch";
|
||||||
|
|
||||||
export const MAX_DEPTH = 1000000;
|
export const MAX_DEPTH = 1000000;
|
||||||
|
export const MAX_RETRY_FAILED = 5;
|
||||||
|
|
||||||
export const FETCH_HEADERS_TIMEOUT_SECS = 30;
|
export const FETCH_HEADERS_TIMEOUT_SECS = 30;
|
||||||
export const PAGE_OP_TIMEOUT_SECS = 5;
|
export const PAGE_OP_TIMEOUT_SECS = 5;
|
||||||
|
|
|
@ -3,7 +3,7 @@ import { v4 as uuidv4 } from "uuid";
|
||||||
|
|
||||||
import { logger } from "./logger.js";
|
import { logger } from "./logger.js";
|
||||||
|
|
||||||
import { MAX_DEPTH } from "./constants.js";
|
import { MAX_DEPTH, MAX_RETRY_FAILED } from "./constants.js";
|
||||||
import { ScopedSeed } from "./seeds.js";
|
import { ScopedSeed } from "./seeds.js";
|
||||||
import { Frame } from "puppeteer-core";
|
import { Frame } from "puppeteer-core";
|
||||||
|
|
||||||
|
@ -170,7 +170,7 @@ export type SaveState = {
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
export class RedisCrawlState {
|
export class RedisCrawlState {
|
||||||
redis: Redis;
|
redis: Redis;
|
||||||
maxRetryPending = 1;
|
maxRetryPending = MAX_RETRY_FAILED;
|
||||||
|
|
||||||
uid: string;
|
uid: string;
|
||||||
key: string;
|
key: string;
|
||||||
|
@ -608,7 +608,7 @@ return inx;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retryFailed) {
|
if (retryFailed) {
|
||||||
logger.debug("Retring failed URL", { url: data.url }, "state");
|
logger.debug("Retrying failed URL", { url: data.url }, "state");
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.markStarted(data.url);
|
await this.markStarted(data.url);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue