mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-11-01 04:30:53 +00:00
Store crawler start and end times in Redis lists (#397)
* Store crawler start and end times in Redis lists * end time tweaks: - set end time for logger.fatal() - set missing start time into setEndTime() --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
f453dbfb56
commit
a23f840318
3 changed files with 41 additions and 3 deletions
14
crawler.js
14
crawler.js
|
|
@ -337,7 +337,7 @@ export class Crawler {
|
||||||
|
|
||||||
await this.closeLog();
|
await this.closeLog();
|
||||||
|
|
||||||
process.exit(exitCode);
|
await this.setEndTimeAndExit(exitCode);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -712,10 +712,18 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async setEndTimeAndExit(exitCode = 0) {
|
||||||
|
if (this.crawlState) {
|
||||||
|
await this.crawlState.setEndTime();
|
||||||
|
}
|
||||||
|
process.exit(exitCode);
|
||||||
|
}
|
||||||
|
|
||||||
async serializeAndExit() {
|
async serializeAndExit() {
|
||||||
await this.serializeConfig();
|
await this.serializeConfig();
|
||||||
await this.closeLog();
|
await this.closeLog();
|
||||||
process.exit(this.interrupted ? 13 : 0);
|
|
||||||
|
await this.setEndTimeAndExit(this.interrupted ? 13 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
async isCrawlRunning() {
|
async isCrawlRunning() {
|
||||||
|
|
@ -746,6 +754,8 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
|
|
||||||
await this.initCrawlState();
|
await this.initCrawlState();
|
||||||
|
|
||||||
|
await this.crawlState.setStartTime();
|
||||||
|
|
||||||
let initState = await this.crawlState.getStatus();
|
let initState = await this.crawlState.getStatus();
|
||||||
|
|
||||||
while (initState === "debug") {
|
while (initState === "debug") {
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,12 @@ class Logger
|
||||||
|
|
||||||
fatal(message, data={}, context="general", exitCode=17) {
|
fatal(message, data={}, context="general", exitCode=17) {
|
||||||
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
|
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
|
||||||
process.exit(exitCode);
|
|
||||||
|
if (this.crawlState) {
|
||||||
|
this.crawlState.setEndTime().finally(process.exit(exitCode));
|
||||||
|
} else {
|
||||||
|
process.exit(exitCode);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,9 @@ export class RedisCrawlState
|
||||||
this.fkey = this.key + ":f";
|
this.fkey = this.key + ":f";
|
||||||
// crawler errors
|
// crawler errors
|
||||||
this.ekey = this.key + ":e";
|
this.ekey = this.key + ":e";
|
||||||
|
// start and end times to compute execution minutes
|
||||||
|
this.startkey = this.key + ":start";
|
||||||
|
this.endkey = this.key + ":end";
|
||||||
|
|
||||||
this._initLuaCommands(this.redis);
|
this._initLuaCommands(this.redis);
|
||||||
}
|
}
|
||||||
|
|
@ -183,6 +186,26 @@ return 0;
|
||||||
return new Date().toISOString();
|
return new Date().toISOString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async setStartTime() {
|
||||||
|
const startTime = this._timestamp();
|
||||||
|
return await this.redis.rpush(`${this.startkey}:${this.uid}`, startTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
async getStartTimes() {
|
||||||
|
return await this.redis.lrange(`${this.startkey}:${this.uid}`, 0, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
async setEndTime() {
|
||||||
|
// Set start time if crawler exits before it was able to set one
|
||||||
|
if (!await this.redis.llen(`${this.startkey}:${this.uid}`)) {
|
||||||
|
await this.setStartTime();
|
||||||
|
}
|
||||||
|
|
||||||
|
const endTime = this._timestamp();
|
||||||
|
|
||||||
|
return await this.redis.rpush(`${this.endkey}:${this.uid}`, endTime);
|
||||||
|
}
|
||||||
|
|
||||||
async markStarted(url) {
|
async markStarted(url) {
|
||||||
const started = this._timestamp();
|
const started = this._timestamp();
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue