more cleanup

This commit is contained in:
Ilya Kreymer 2025-02-09 13:39:40 -08:00
parent f14f53f5c2
commit 2b219cfd22
9 changed files with 31 additions and 14 deletions

View file

@ -608,7 +608,7 @@ export class Crawler {
}
} catch (e) {
logger.error("Crawl failed", e);
exitCode = ExitCodes.Failed;
exitCode = ExitCodes.CrawlFailed;
status = "failing";
if (await this.crawlState.incFailCount()) {
status = "failed";
@ -1867,7 +1867,7 @@ self.__bx_behaviors.selectMainBehavior();
"Unable to write WACZ successfully",
{},
"wacz",
ExitCodes.Fatal,
ExitCodes.UploadError,
);
}
}

View file

@ -147,7 +147,7 @@ function getDefaultWindowSize() {
function handleTerminate(signame: string) {
logger.info(`Got signal ${signame}, exiting`);
process.exit(ExitCodes.GenericError);
process.exit(ExitCodes.InterruptedGraceful);
}
async function main() {

View file

@ -16,7 +16,7 @@ async function handleTerminate(signame: string) {
logger.info(`${signame} received...`);
if (!crawler || !crawler.crawlState) {
logger.error("error: no crawler running, exiting");
process.exit(ExitCodes.GenericError);
process.exit(ExitCodes.CrawlFailed);
}
if (crawler.done) {

View file

@ -835,7 +835,7 @@ class ArgParser {
"Invalid seed specified, aborting crawl",
{ url: newSeed.url },
"general",
1,
ExitCodes.FailCrawl,
);
}
}

View file

@ -6,6 +6,7 @@ import { Browser } from "./browser.js";
import { fetch } from "undici";
import { getProxyDispatcher } from "./proxy.js";
import { ExitCodes } from "./constants.js";
const RULE_TYPES = ["block", "allowOnly"];
@ -47,7 +48,12 @@ class BlockRule {
}
if (!RULE_TYPES.includes(this.type)) {
logger.fatal('Rule "type" must be: ' + RULE_TYPES.join(", "));
logger.fatal(
'Rule "type" must be: ' + RULE_TYPES.join(", "),
{},
"blocking",
ExitCodes.FailCrawl,
);
}
}

View file

@ -59,13 +59,15 @@ export const DISPLAY = ":99";
export enum ExitCodes {
Success = 0,
GenericError = 1,
Failed = 9,
//GenericError = 1,
OutOfSpace = 3,
CrawlFailed = 9,
BrowserCrashed = 10,
InterruptedGraceful = 11,
InterruptedImmediate = 13,
Fatal = 17,
FatalError = 17,
RedisGone = 18,
UploadError = 19,
ProxyError = 21,
// used to indicate crawl should be failed, not just restarted

View file

@ -74,7 +74,7 @@ class Logger {
contexts: LogContext[] = [];
excludeContexts: LogContext[] = [];
crawlState?: RedisCrawlState | null = null;
fatalExitCode: ExitCodes = ExitCodes.Fatal;
fatalExitCode: ExitCodes = ExitCodes.FatalError;
setDefaultFatalExitCode(exitCode: ExitCodes) {
this.fatalExitCode = exitCode;
@ -184,9 +184,9 @@ class Logger {
message: string,
data = {},
context: LogContext = "general",
exitCode?: ExitCodes | undefined,
exitCode: ExitCodes,
) {
if (!exitCode) {
if (exitCode === ExitCodes.FailCrawl) {
exitCode = this.fatalExitCode;
}
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");

View file

@ -1,5 +1,6 @@
import { Redis } from "ioredis";
import { logger } from "./logger.js";
import { ExitCodes } from "./constants.js";
const error = console.error;
@ -18,7 +19,12 @@ console.error = function (...args) {
if (now - lastLogTime > REDIS_ERROR_LOG_INTERVAL_SECS) {
if (lastLogTime && exitOnError) {
logger.fatal("Crawl interrupted, redis gone, exiting", {}, "redis");
logger.fatal(
"Crawl interrupted, redis gone, exiting",
{},
"redis",
ExitCodes.RedisGone,
);
}
logger.warn("ioredis error", { error: args[0] }, "redis");
lastLogTime = now;

View file

@ -1,5 +1,5 @@
import { logger } from "./logger.js";
import { MAX_DEPTH } from "./constants.js";
import { ExitCodes, MAX_DEPTH } from "./constants.js";
type ScopeType =
| "prefix"
@ -219,6 +219,9 @@ export class ScopedSeed {
default:
logger.fatal(
`Invalid scope type "${scopeType}" specified, valid types are: page, page-spa, prefix, host, domain, any`,
{},
"general",
ExitCodes.FailCrawl,
);
}