mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
don't log page URLs dupes as errors
This commit is contained in:
parent
3fde3d3885
commit
8d87293932
1 changed files with 5 additions and 10 deletions
|
@ -2455,30 +2455,25 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
this.pageLimit,
|
||||
);
|
||||
|
||||
const logContext = depth === 0 ? "scope" : "links";
|
||||
const logLevel = depth === 0 ? "error" : "debug";
|
||||
|
||||
switch (result) {
|
||||
case QueueState.ADDED:
|
||||
logger.debug("Queued new page URL", { url, ...logDetails }, logContext);
|
||||
logger.debug("Queued new page URL", { url, ...logDetails }, "links");
|
||||
return true;
|
||||
|
||||
case QueueState.LIMIT_HIT:
|
||||
logger.logAsJSON(
|
||||
logger.debug(
|
||||
"Page URL not queued, at page limit",
|
||||
{ url, ...logDetails },
|
||||
logContext,
|
||||
logLevel,
|
||||
"links",
|
||||
);
|
||||
this.limitHit = true;
|
||||
return false;
|
||||
|
||||
case QueueState.DUPE_URL:
|
||||
logger.logAsJSON(
|
||||
logger.debug(
|
||||
"Page URL not queued, already seen",
|
||||
{ url, ...logDetails },
|
||||
logContext,
|
||||
logLevel,
|
||||
"links",
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue