mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Store crawler start and end times in Redis lists (#397)
* Store crawler start and end times in Redis lists * end time tweaks: - set end time for logger.fatal() - set missing start time into setEndTime() --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
f453dbfb56
commit
a23f840318
3 changed files with 41 additions and 3 deletions
14
crawler.js
14
crawler.js
|
@ -337,7 +337,7 @@ export class Crawler {
|
|||
|
||||
await this.closeLog();
|
||||
|
||||
process.exit(exitCode);
|
||||
await this.setEndTimeAndExit(exitCode);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -712,10 +712,18 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
}
|
||||
}
|
||||
|
||||
async setEndTimeAndExit(exitCode = 0) {
|
||||
if (this.crawlState) {
|
||||
await this.crawlState.setEndTime();
|
||||
}
|
||||
process.exit(exitCode);
|
||||
}
|
||||
|
||||
async serializeAndExit() {
|
||||
await this.serializeConfig();
|
||||
await this.closeLog();
|
||||
process.exit(this.interrupted ? 13 : 0);
|
||||
|
||||
await this.setEndTimeAndExit(this.interrupted ? 13 : 0);
|
||||
}
|
||||
|
||||
async isCrawlRunning() {
|
||||
|
@ -746,6 +754,8 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
|
||||
await this.initCrawlState();
|
||||
|
||||
await this.crawlState.setStartTime();
|
||||
|
||||
let initState = await this.crawlState.getStatus();
|
||||
|
||||
while (initState === "debug") {
|
||||
|
|
|
@ -103,7 +103,12 @@ class Logger
|
|||
|
||||
fatal(message, data={}, context="general", exitCode=17) {
|
||||
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
|
||||
process.exit(exitCode);
|
||||
|
||||
if (this.crawlState) {
|
||||
this.crawlState.setEndTime().finally(process.exit(exitCode));
|
||||
} else {
|
||||
process.exit(exitCode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,6 +68,9 @@ export class RedisCrawlState
|
|||
this.fkey = this.key + ":f";
|
||||
// crawler errors
|
||||
this.ekey = this.key + ":e";
|
||||
// start and end times to compute execution minutes
|
||||
this.startkey = this.key + ":start";
|
||||
this.endkey = this.key + ":end";
|
||||
|
||||
this._initLuaCommands(this.redis);
|
||||
}
|
||||
|
@ -183,6 +186,26 @@ return 0;
|
|||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
async setStartTime() {
|
||||
const startTime = this._timestamp();
|
||||
return await this.redis.rpush(`${this.startkey}:${this.uid}`, startTime);
|
||||
}
|
||||
|
||||
async getStartTimes() {
|
||||
return await this.redis.lrange(`${this.startkey}:${this.uid}`, 0, -1);
|
||||
}
|
||||
|
||||
async setEndTime() {
|
||||
// Set start time if crawler exits before it was able to set one
|
||||
if (!await this.redis.llen(`${this.startkey}:${this.uid}`)) {
|
||||
await this.setStartTime();
|
||||
}
|
||||
|
||||
const endTime = this._timestamp();
|
||||
|
||||
return await this.redis.rpush(`${this.endkey}:${this.uid}`, endTime);
|
||||
}
|
||||
|
||||
async markStarted(url) {
|
||||
const started = this._timestamp();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue