support pause interrupt: (#825)

- add new interrupt reason / exit code
- add isCrawlPaused() which checks redis <id>:paused key
- exit gracefully, upload WACZ file when paused

fixes #824
This commit is contained in:
Ilya Kreymer 2025-05-05 10:10:08 -07:00 committed by GitHub
parent f9bd534e4c
commit e39d5a31eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 16 additions and 6 deletions

View file

@ -1497,6 +1497,10 @@ self.__bx_behaviors.selectMainBehavior();
}
}
if (await this.crawlState.isCrawlPaused()) {
interrupt = InterruptReason.CrawlPaused;
}
if (interrupt) {
this.uploadAndDeleteLocal = true;
this.gracefulFinishOnInterrupt(interrupt);
@ -1859,12 +1863,9 @@ self.__bx_behaviors.selectMainBehavior();
if (isFinished || (await this.crawlState.isCrawlCanceled())) {
return;
}
// if stopped, won't get anymore data
if (await this.crawlState.isCrawlStopped()) {
// possibly restarted after committing, so assume done here!
if ((await this.crawlState.numDone()) > 0) {
return;
}
// possibly restarted after committing, so assume done here!
if ((await this.crawlState.numDone()) > 0) {
return;
}
// fail crawl otherwise
logger.fatal("No WARC Files, assuming crawl failed");

View file

@ -88,4 +88,5 @@ export enum InterruptReason {
DiskUtilization = 4,
BrowserCrashed = 5,
SignalInterrupted = 6,
CrawlPaused = 7,
}

View file

@ -521,6 +521,14 @@ return inx;
return false;
}
async isCrawlPaused() {
if ((await this.redis.get(`${this.key}:paused`)) === "1") {
return true;
}
return false;
}
async isCrawlCanceled() {
return (await this.redis.get(`${this.key}:canceled`)) === "1";
}