mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
url queueing: log skipped URLs as errors if depth === 0 (#868)
- will ensure sees from URL list are reported as errors if skipped - also set logging context to 'scope' instead of 'links' - fixes #866 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
96fd22971f
commit
1a4341bfbc
3 changed files with 18 additions and 10 deletions
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "1.7.0-beta.0",
|
||||
"version": "1.7.0-beta.1",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
|
|
@ -2459,25 +2459,30 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
this.pageLimit,
|
||||
);
|
||||
|
||||
const logContext = depth === 0 ? "scope" : "links";
|
||||
const logLevel = depth === 0 ? "error" : "debug";
|
||||
|
||||
switch (result) {
|
||||
case QueueState.ADDED:
|
||||
logger.debug("Queued new page url", { url, ...logDetails }, "links");
|
||||
logger.debug("Queued new page url", { url, ...logDetails }, logContext);
|
||||
return true;
|
||||
|
||||
case QueueState.LIMIT_HIT:
|
||||
logger.debug(
|
||||
"Not queued page url, at page limit",
|
||||
logger.logAsJSON(
|
||||
"Page url not queued, at page limit",
|
||||
{ url, ...logDetails },
|
||||
"links",
|
||||
logContext,
|
||||
logLevel,
|
||||
);
|
||||
this.limitHit = true;
|
||||
return false;
|
||||
|
||||
case QueueState.DUPE_URL:
|
||||
logger.debug(
|
||||
"Not queued page url, already seen",
|
||||
logger.logAsJSON(
|
||||
"Page url not queued, already seen",
|
||||
{ url, ...logDetails },
|
||||
"links",
|
||||
logContext,
|
||||
logLevel,
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -56,10 +56,13 @@ export const LOG_CONTEXT_TYPES = [
|
|||
"wacz",
|
||||
"replay",
|
||||
"proxy",
|
||||
"scope",
|
||||
] as const;
|
||||
|
||||
export type LogContext = (typeof LOG_CONTEXT_TYPES)[number];
|
||||
|
||||
export type LogLevel = "debug" | "info" | "warn" | "error" | "fatal";
|
||||
|
||||
export const DEFAULT_EXCLUDE_LOG_CONTEXTS: LogContext[] = [
|
||||
"recorderNetwork",
|
||||
"jsError",
|
||||
|
@ -118,7 +121,7 @@ class Logger {
|
|||
message: string,
|
||||
dataUnknown: unknown,
|
||||
context: LogContext,
|
||||
logLevel = "info",
|
||||
logLevel: LogLevel,
|
||||
) {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const data: Record<string, any> = formatErr(dataUnknown);
|
||||
|
@ -182,7 +185,7 @@ class Logger {
|
|||
}
|
||||
|
||||
info(message: string, data: unknown = {}, context: LogContext = "general") {
|
||||
this.logAsJSON(message, data, context);
|
||||
this.logAsJSON(message, data, context, "info");
|
||||
}
|
||||
|
||||
error(message: string, data: unknown = {}, context: LogContext = "general") {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue