url queueing: log skipped URLs as errors if depth === 0 (#868)

- will ensure sees from URL list are reported as errors if skipped
- also set logging context to 'scope' instead of 'links'
- fixes #866

---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2025-07-23 10:05:40 -07:00 committed by GitHub
parent 96fd22971f
commit 1a4341bfbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 18 additions and 10 deletions

View file

@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "1.7.0-beta.0",
"version": "1.7.0-beta.1",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",

View file

@ -2459,25 +2459,30 @@ self.__bx_behaviors.selectMainBehavior();
this.pageLimit,
);
const logContext = depth === 0 ? "scope" : "links";
const logLevel = depth === 0 ? "error" : "debug";
switch (result) {
case QueueState.ADDED:
logger.debug("Queued new page url", { url, ...logDetails }, "links");
logger.debug("Queued new page url", { url, ...logDetails }, logContext);
return true;
case QueueState.LIMIT_HIT:
logger.debug(
"Not queued page url, at page limit",
logger.logAsJSON(
"Page url not queued, at page limit",
{ url, ...logDetails },
"links",
logContext,
logLevel,
);
this.limitHit = true;
return false;
case QueueState.DUPE_URL:
logger.debug(
"Not queued page url, already seen",
logger.logAsJSON(
"Page url not queued, already seen",
{ url, ...logDetails },
"links",
logContext,
logLevel,
);
return false;
}

View file

@ -56,10 +56,13 @@ export const LOG_CONTEXT_TYPES = [
"wacz",
"replay",
"proxy",
"scope",
] as const;
export type LogContext = (typeof LOG_CONTEXT_TYPES)[number];
export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
export const DEFAULT_EXCLUDE_LOG_CONTEXTS: LogContext[] = [
"recorderNetwork",
"jsError",
@ -118,7 +121,7 @@ class Logger {
message: string,
dataUnknown: unknown,
context: LogContext,
logLevel = "info",
logLevel: LogLevel,
) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const data: Record<string, any> = formatErr(dataUnknown);
@ -182,7 +185,7 @@ class Logger {
}
info(message: string, data: unknown = {}, context: LogContext = "general") {
this.logAsJSON(message, data, context);
this.logAsJSON(message, data, context, "info");
}
error(message: string, data: unknown = {}, context: LogContext = "general") {