diff --git a/src/crawler.ts b/src/crawler.ts index c437de9c..069109ab 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -129,6 +129,8 @@ export class Crawler { limitHit = false; pageLimit: number; + dupeSeedsFound = false; + saveStateFiles: string[] = []; lastSaveTime: number; @@ -2466,10 +2468,19 @@ self.__bx_behaviors.selectMainBehavior(); { url, ...logDetails }, "links", ); + if (!this.limitHit && depth === 0) { + logger.error( + "Page limit reached when adding URL list, some URLs not crawled.", + ); + } this.limitHit = true; return false; case QueueState.DUPE_URL: + if (!this.dupeSeedsFound && depth === 0) { + logger.error("Duplicate seed URLs found and skipped"); + this.dupeSeedsFound = true; + } logger.debug( "Page URL not queued, already seen", { url, ...logDetails },