set failed URL retry to 5 by default

This commit is contained in:
Ilya Kreymer 2025-01-17 18:19:31 -08:00
parent 5d9c62e264
commit 5961a521c2
3 changed files with 5 additions and 4 deletions

View file

@ -1921,7 +1921,7 @@ self.__bx_behaviors.selectMainBehavior();
} else if (!downloadResponse) { } else if (!downloadResponse) {
// log if not already log and rethrow, consider page failed // log if not already log and rethrow, consider page failed
if (msg !== "logged") { if (msg !== "logged") {
logger.error("Page Load Failed, skipping page", { logger.error("Page Load Failed, will retry", {
msg, msg,
loadState: data.loadState, loadState: data.loadState,
...logDetails, ...logDetails,

View file

@ -27,6 +27,7 @@ export const ADD_LINK_FUNC = "__bx_addLink";
export const FETCH_FUNC = "__bx_fetch"; export const FETCH_FUNC = "__bx_fetch";
export const MAX_DEPTH = 1000000; export const MAX_DEPTH = 1000000;
export const MAX_RETRY_FAILED = 5;
export const FETCH_HEADERS_TIMEOUT_SECS = 30; export const FETCH_HEADERS_TIMEOUT_SECS = 30;
export const PAGE_OP_TIMEOUT_SECS = 5; export const PAGE_OP_TIMEOUT_SECS = 5;

View file

@ -3,7 +3,7 @@ import { v4 as uuidv4 } from "uuid";
import { logger } from "./logger.js"; import { logger } from "./logger.js";
import { MAX_DEPTH } from "./constants.js"; import { MAX_DEPTH, MAX_RETRY_FAILED } from "./constants.js";
import { ScopedSeed } from "./seeds.js"; import { ScopedSeed } from "./seeds.js";
import { Frame } from "puppeteer-core"; import { Frame } from "puppeteer-core";
@ -170,7 +170,7 @@ export type SaveState = {
// ============================================================================ // ============================================================================
export class RedisCrawlState { export class RedisCrawlState {
redis: Redis; redis: Redis;
maxRetryPending = 1; maxRetryPending = MAX_RETRY_FAILED;
uid: string; uid: string;
key: string; key: string;
@ -608,7 +608,7 @@ return inx;
} }
if (retryFailed) { if (retryFailed) {
logger.debug("Retring failed URL", { url: data.url }, "state"); logger.debug("Retrying failed URL", { url: data.url }, "state");
} }
await this.markStarted(data.url); await this.markStarted(data.url);