add locking mechanism for deleteOnExit and runnning post-crawl steps

only run postcrawl steps on last instance, when all locks released
This commit is contained in:
Ilya Kreymer 2022-03-16 04:16:33 +00:00
parent 59c1bd5626
commit c0508d44a7
2 changed files with 16 additions and 3 deletions

View file

@ -29,6 +29,7 @@ const { S3StorageSync, getFileSize } = require("./util/storage");
const { ScreenCaster, WSTransport, RedisPubSubTransport } = require("./util/screencaster");
const { parseArgs } = require("./util/argParser");
const { initRedis } = require("./util/redis");
const { Lock } = require("./util/lock");
const { getBrowserExe, loadProfile, evaluateWithCLI } = require("./util/browser");
@ -89,6 +90,9 @@ class Crawler {
this.pagesFile = path.join(this.pagesDir, "pages.jsonl");
this.blockRules = null;
// lock is obtained if last/single process in parallel runs
this.lock = this.params.deleteOnExit ? new Lock(this.collDir) : null;
}
statusLog(...args) {
@ -217,9 +221,13 @@ class Crawler {
subprocesses.push(child_process.spawn("uwsgi", [path.join(__dirname, "uwsgi.ini")], opts));
process.on("exit", (code) => {
if (this.params.deleteOnExit && (code === 0 || code === 1)) {
if (this.params.deleteOnExit && this.lock.release() && (code === 0 || code === 3)) {
console.log(`Deleting ${this.collDir} before exit`);
fs.rmSync(this.collDir, { recursive: true, force: true });
try {
fs.rmSync(this.collDir, { recursive: true, force: true });
} catch(e) {
console.warn(e);
}
}
for (const proc of subprocesses) {
@ -461,6 +469,11 @@ class Crawler {
// extra wait for all resources to land into WARCs
await this.awaitPendingClear();
if (this.lock && !this.lock.release()) {
this.statusLog("Exiting, other parallel jobs will finish...");
return;
}
if (this.params.combineWARC) {
await this.combineWARC();
}

View file

@ -35,7 +35,7 @@ process.on("SIGINT", async () => {
process.on("SIGTERM", async () => {
if (forceTerm) {
console.log("SIGTERM received, exit immediately");
process.exit(1);
process.exit(3);
}
console.log("SIGTERM received...");