browsertrix-crawler/main.js
Ilya Kreymer 1a273abc20
remove tracking execution time here (handled in browsertrix cloud app instead) (#406)
- don't set start / end time in redis
- rename setEndTimeAndExit to setStatusAndExit

add 'fast cancel' option:
- add isCrawlCanceled() to state, which checks redis canceled key
- on interrupt, if canceled, immediately exit with status 0
- on fatal, exit with code 0 if restartsOnError is set
- no longer keeping track of start/end time in crawler itself
2023-10-09 12:28:58 -07:00

57 lines
1.3 KiB
JavaScript
Executable file

#!/usr/bin/env -S node --experimental-global-webcrypto
import { logger } from "./util/logger.js";
import { setExitOnRedisError } from "./util/redis.js";
import { Crawler } from "./crawler.js";
var crawler = null;
var lastSigInt = 0;
let forceTerm = false;
async function handleTerminate(signame) {
logger.info(`${signame} received...`);
if (!crawler || !crawler.crawlState) {
logger.error("error: no crawler running, exiting");
process.exit(1);
}
if (crawler.done) {
logger.info("success: crawler done, exiting");
process.exit(0);
}
setExitOnRedisError(true);
try {
crawler.checkCanceled();
if (!crawler.interrupted) {
logger.info("SIGNAL: gracefully finishing current pages...");
crawler.gracefulFinishOnInterrupt();
} else if (forceTerm || (Date.now() - lastSigInt) > 200) {
logger.info("SIGNAL: stopping crawl now...");
await crawler.serializeAndExit();
}
lastSigInt = Date.now();
} catch (e) {
logger.error("Error stopping crawl after receiving termination signal", e);
}
}
process.on("SIGINT", () => handleTerminate("SIGINT"));
process.on("SIGTERM", () => handleTerminate("SIGTERM"));
process.on("SIGABRT", async () => {
logger.info("SIGABRT received, will force immediate exit on SIGTERM/SIGINT");
forceTerm = true;
});
crawler = new Crawler();
crawler.run();