Store crawler start and end times in Redis lists (#397)

* Store crawler start and end times in Redis lists

* end time tweaks:
- set end time for logger.fatal()
- set missing start time into setEndTime()

---------
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Tessa Walsh 2023-10-02 20:55:52 -04:00 committed by GitHub
parent f453dbfb56
commit a23f840318
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 41 additions and 3 deletions

View file

@ -337,7 +337,7 @@ export class Crawler {
await this.closeLog();
process.exit(exitCode);
await this.setEndTimeAndExit(exitCode);
}
}
@ -712,10 +712,18 @@ self.__bx_behaviors.selectMainBehavior();
}
}
async setEndTimeAndExit(exitCode = 0) {
if (this.crawlState) {
await this.crawlState.setEndTime();
}
process.exit(exitCode);
}
async serializeAndExit() {
await this.serializeConfig();
await this.closeLog();
process.exit(this.interrupted ? 13 : 0);
await this.setEndTimeAndExit(this.interrupted ? 13 : 0);
}
async isCrawlRunning() {
@ -746,6 +754,8 @@ self.__bx_behaviors.selectMainBehavior();
await this.initCrawlState();
await this.crawlState.setStartTime();
let initState = await this.crawlState.getStatus();
while (initState === "debug") {

View file

@ -103,7 +103,12 @@ class Logger
fatal(message, data={}, context="general", exitCode=17) {
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
process.exit(exitCode);
if (this.crawlState) {
this.crawlState.setEndTime().finally(process.exit(exitCode));
} else {
process.exit(exitCode);
}
}
}

View file

@ -68,6 +68,9 @@ export class RedisCrawlState
this.fkey = this.key + ":f";
// crawler errors
this.ekey = this.key + ":e";
// start and end times to compute execution minutes
this.startkey = this.key + ":start";
this.endkey = this.key + ":end";
this._initLuaCommands(this.redis);
}
@ -183,6 +186,26 @@ return 0;
return new Date().toISOString();
}
async setStartTime() {
const startTime = this._timestamp();
return await this.redis.rpush(`${this.startkey}:${this.uid}`, startTime);
}
async getStartTimes() {
return await this.redis.lrange(`${this.startkey}:${this.uid}`, 0, -1);
}
async setEndTime() {
// Set start time if crawler exits before it was able to set one
if (!await this.redis.llen(`${this.startkey}:${this.uid}`)) {
await this.setStartTime();
}
const endTime = this._timestamp();
return await this.redis.rpush(`${this.endkey}:${this.uid}`, endTime);
}
async markStarted(url) {
const started = this._timestamp();