mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
support pause interrupt: (#825)
- add new interrupt reason / exit code - add isCrawlPaused() which checks redis <id>:paused key - exit gracefully, upload WACZ file when paused fixes #824
This commit is contained in:
parent
f9bd534e4c
commit
e39d5a31eb
3 changed files with 16 additions and 6 deletions
|
@ -1497,6 +1497,10 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
}
|
||||
}
|
||||
|
||||
if (await this.crawlState.isCrawlPaused()) {
|
||||
interrupt = InterruptReason.CrawlPaused;
|
||||
}
|
||||
|
||||
if (interrupt) {
|
||||
this.uploadAndDeleteLocal = true;
|
||||
this.gracefulFinishOnInterrupt(interrupt);
|
||||
|
@ -1859,12 +1863,9 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
if (isFinished || (await this.crawlState.isCrawlCanceled())) {
|
||||
return;
|
||||
}
|
||||
// if stopped, won't get anymore data
|
||||
if (await this.crawlState.isCrawlStopped()) {
|
||||
// possibly restarted after committing, so assume done here!
|
||||
if ((await this.crawlState.numDone()) > 0) {
|
||||
return;
|
||||
}
|
||||
// possibly restarted after committing, so assume done here!
|
||||
if ((await this.crawlState.numDone()) > 0) {
|
||||
return;
|
||||
}
|
||||
// fail crawl otherwise
|
||||
logger.fatal("No WARC Files, assuming crawl failed");
|
||||
|
|
|
@ -88,4 +88,5 @@ export enum InterruptReason {
|
|||
DiskUtilization = 4,
|
||||
BrowserCrashed = 5,
|
||||
SignalInterrupted = 6,
|
||||
CrawlPaused = 7,
|
||||
}
|
||||
|
|
|
@ -521,6 +521,14 @@ return inx;
|
|||
return false;
|
||||
}
|
||||
|
||||
async isCrawlPaused() {
|
||||
if ((await this.redis.get(`${this.key}:paused`)) === "1") {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
async isCrawlCanceled() {
|
||||
return (await this.redis.get(`${this.key}:canceled`)) === "1";
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue