set failed URL retry to 5 by default

This commit is contained in:
Ilya Kreymer 2025-01-17 18:19:31 -08:00
parent 5d9c62e264
commit 5961a521c2
3 changed files with 5 additions and 4 deletions

View file

@ -1921,7 +1921,7 @@ self.__bx_behaviors.selectMainBehavior();
} else if (!downloadResponse) {
// log if not already log and rethrow, consider page failed
if (msg !== "logged") {
logger.error("Page Load Failed, skipping page", {
logger.error("Page Load Failed, will retry", {
msg,
loadState: data.loadState,
...logDetails,

View file

@ -27,6 +27,7 @@ export const ADD_LINK_FUNC = "__bx_addLink";
export const FETCH_FUNC = "__bx_fetch";
export const MAX_DEPTH = 1000000;
export const MAX_RETRY_FAILED = 5;
export const FETCH_HEADERS_TIMEOUT_SECS = 30;
export const PAGE_OP_TIMEOUT_SECS = 5;

View file

@ -3,7 +3,7 @@ import { v4 as uuidv4 } from "uuid";
import { logger } from "./logger.js";
import { MAX_DEPTH } from "./constants.js";
import { MAX_DEPTH, MAX_RETRY_FAILED } from "./constants.js";
import { ScopedSeed } from "./seeds.js";
import { Frame } from "puppeteer-core";
@ -170,7 +170,7 @@ export type SaveState = {
// ============================================================================
export class RedisCrawlState {
redis: Redis;
maxRetryPending = 1;
maxRetryPending = MAX_RETRY_FAILED;
uid: string;
key: string;
@ -608,7 +608,7 @@ return inx;
}
if (retryFailed) {
logger.debug("Retring failed URL", { url: data.url }, "state");
logger.debug("Retrying failed URL", { url: data.url }, "state");
}
await this.markStarted(data.url);