misc tweaks: (#650)

- logging: log behavior options that are enabled on startup, after seeds
- redis: launch local redis only if --redisStoreUrl starts with
redis://localhost or redis://127.0.0.1
- interrupt: check that crawler is not 'done' before exiting with exit
code 13, if already done, exit with 0
This commit is contained in:
Ilya Kreymer 2024-07-23 18:50:26 -07:00 committed by GitHub
parent 48716c172d
commit d620eb8e31
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -458,7 +458,14 @@ export class Crawler {
this.proxyServer = initProxy(this.params.proxyServer); this.proxyServer = initProxy(this.params.proxyServer);
subprocesses.push(this.launchRedis()); const redisUrl = this.params.redisStoreUrl || "redis://localhost:6379/0";
if (
redisUrl.startsWith("redis://localhost:") ||
redisUrl.startsWith("redis://127.0.0.1:")
) {
subprocesses.push(this.launchRedis());
}
await fsp.mkdir(this.logDir, { recursive: true }); await fsp.mkdir(this.logDir, { recursive: true });
@ -477,6 +484,8 @@ export class Crawler {
logger.info("Seeds", this.seeds); logger.info("Seeds", this.seeds);
logger.info("Behavior Options", this.params.behaviorOpts);
if (this.params.profile) { if (this.params.profile) {
logger.info("With Browser Profile", { url: this.params.profile }); logger.info("With Browser Profile", { url: this.params.profile });
} }
@ -1242,10 +1251,12 @@ self.__bx_behaviors.selectMainBehavior();
await this.browser.close(); await this.browser.close();
await closeWorkers(0); await closeWorkers(0);
await this.closeFiles(); await this.closeFiles();
await this.setStatusAndExit(13, "interrupted"); if (!this.done) {
} else { await this.setStatusAndExit(13, "interrupted");
await this.setStatusAndExit(0, "done"); return;
}
} }
await this.setStatusAndExit(0, "done");
} }
async isCrawlRunning() { async isCrawlRunning() {