mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
more cleanup
This commit is contained in:
parent
f14f53f5c2
commit
2b219cfd22
9 changed files with 31 additions and 14 deletions
|
@ -608,7 +608,7 @@ export class Crawler {
|
|||
}
|
||||
} catch (e) {
|
||||
logger.error("Crawl failed", e);
|
||||
exitCode = ExitCodes.Failed;
|
||||
exitCode = ExitCodes.CrawlFailed;
|
||||
status = "failing";
|
||||
if (await this.crawlState.incFailCount()) {
|
||||
status = "failed";
|
||||
|
@ -1867,7 +1867,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
"Unable to write WACZ successfully",
|
||||
{},
|
||||
"wacz",
|
||||
ExitCodes.Fatal,
|
||||
ExitCodes.UploadError,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -147,7 +147,7 @@ function getDefaultWindowSize() {
|
|||
|
||||
function handleTerminate(signame: string) {
|
||||
logger.info(`Got signal ${signame}, exiting`);
|
||||
process.exit(ExitCodes.GenericError);
|
||||
process.exit(ExitCodes.InterruptedGraceful);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
|
|
|
@ -16,7 +16,7 @@ async function handleTerminate(signame: string) {
|
|||
logger.info(`${signame} received...`);
|
||||
if (!crawler || !crawler.crawlState) {
|
||||
logger.error("error: no crawler running, exiting");
|
||||
process.exit(ExitCodes.GenericError);
|
||||
process.exit(ExitCodes.CrawlFailed);
|
||||
}
|
||||
|
||||
if (crawler.done) {
|
||||
|
|
|
@ -835,7 +835,7 @@ class ArgParser {
|
|||
"Invalid seed specified, aborting crawl",
|
||||
{ url: newSeed.url },
|
||||
"general",
|
||||
1,
|
||||
ExitCodes.FailCrawl,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ import { Browser } from "./browser.js";
|
|||
|
||||
import { fetch } from "undici";
|
||||
import { getProxyDispatcher } from "./proxy.js";
|
||||
import { ExitCodes } from "./constants.js";
|
||||
|
||||
const RULE_TYPES = ["block", "allowOnly"];
|
||||
|
||||
|
@ -47,7 +48,12 @@ class BlockRule {
|
|||
}
|
||||
|
||||
if (!RULE_TYPES.includes(this.type)) {
|
||||
logger.fatal('Rule "type" must be: ' + RULE_TYPES.join(", "));
|
||||
logger.fatal(
|
||||
'Rule "type" must be: ' + RULE_TYPES.join(", "),
|
||||
{},
|
||||
"blocking",
|
||||
ExitCodes.FailCrawl,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -59,13 +59,15 @@ export const DISPLAY = ":99";
|
|||
|
||||
export enum ExitCodes {
|
||||
Success = 0,
|
||||
GenericError = 1,
|
||||
Failed = 9,
|
||||
//GenericError = 1,
|
||||
OutOfSpace = 3,
|
||||
CrawlFailed = 9,
|
||||
BrowserCrashed = 10,
|
||||
InterruptedGraceful = 11,
|
||||
InterruptedImmediate = 13,
|
||||
Fatal = 17,
|
||||
FatalError = 17,
|
||||
RedisGone = 18,
|
||||
UploadError = 19,
|
||||
ProxyError = 21,
|
||||
|
||||
// used to indicate crawl should be failed, not just restarted
|
||||
|
|
|
@ -74,7 +74,7 @@ class Logger {
|
|||
contexts: LogContext[] = [];
|
||||
excludeContexts: LogContext[] = [];
|
||||
crawlState?: RedisCrawlState | null = null;
|
||||
fatalExitCode: ExitCodes = ExitCodes.Fatal;
|
||||
fatalExitCode: ExitCodes = ExitCodes.FatalError;
|
||||
|
||||
setDefaultFatalExitCode(exitCode: ExitCodes) {
|
||||
this.fatalExitCode = exitCode;
|
||||
|
@ -184,9 +184,9 @@ class Logger {
|
|||
message: string,
|
||||
data = {},
|
||||
context: LogContext = "general",
|
||||
exitCode?: ExitCodes | undefined,
|
||||
exitCode: ExitCodes,
|
||||
) {
|
||||
if (!exitCode) {
|
||||
if (exitCode === ExitCodes.FailCrawl) {
|
||||
exitCode = this.fatalExitCode;
|
||||
}
|
||||
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import { Redis } from "ioredis";
|
||||
import { logger } from "./logger.js";
|
||||
import { ExitCodes } from "./constants.js";
|
||||
|
||||
const error = console.error;
|
||||
|
||||
|
@ -18,7 +19,12 @@ console.error = function (...args) {
|
|||
|
||||
if (now - lastLogTime > REDIS_ERROR_LOG_INTERVAL_SECS) {
|
||||
if (lastLogTime && exitOnError) {
|
||||
logger.fatal("Crawl interrupted, redis gone, exiting", {}, "redis");
|
||||
logger.fatal(
|
||||
"Crawl interrupted, redis gone, exiting",
|
||||
{},
|
||||
"redis",
|
||||
ExitCodes.RedisGone,
|
||||
);
|
||||
}
|
||||
logger.warn("ioredis error", { error: args[0] }, "redis");
|
||||
lastLogTime = now;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
import { logger } from "./logger.js";
|
||||
import { MAX_DEPTH } from "./constants.js";
|
||||
import { ExitCodes, MAX_DEPTH } from "./constants.js";
|
||||
|
||||
type ScopeType =
|
||||
| "prefix"
|
||||
|
@ -219,6 +219,9 @@ export class ScopedSeed {
|
|||
default:
|
||||
logger.fatal(
|
||||
`Invalid scope type "${scopeType}" specified, valid types are: page, page-spa, prefix, host, domain, any`,
|
||||
{},
|
||||
"general",
|
||||
ExitCodes.FailCrawl,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue