add logging option (#29)

* add --pywb-log flag cmdline option which enables the pywb logging to stdout/stderr

Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local>
Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
This commit is contained in:
Emma Dickson 2021-03-04 15:36:58 -05:00 committed by GitHub
parent fb0f1d8db9
commit 9ef3f25416
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -107,14 +107,20 @@ class Crawler {
}
bootstrap() {
const opts = {stdio: "ignore", cwd: this.params.cwd};
let opts = {}
if (this.params.pywb_log) {
opts = {stdio: "inherit", cwd: this.params.cwd};
}
else{
opts = {stdio: "ignore", cwd: this.params.cwd};
}
this.configureUA();
this.headers = {"User-Agent": this.userAgent};
child_process.spawn("redis-server", {...opts, cwd: "/tmp/"});
child_process.spawnSync("wb-manager", ["init", this.params.collection], opts);
opts.env = {...process.env, COLL: this.params.collection};
@ -222,6 +228,12 @@ class Crawler {
default: false,
},
"pywb-log": {
describe: "If set, generate pywb log file",
type: "boolean",
default: false,
},
"text": {
describe: "If set, extract text to the pages.jsonl file",
type: "boolean",
@ -420,7 +432,7 @@ class Crawler {
if (this.params.text){
const client = await page.target().createCDPSession();
const result = await client.send("DOM.getDocument", {"depth": -1, "pierce": true});
text = await new TextExtract(result).parseTextFromDom()
text = await new TextExtract(result).parseTextFromDom();
}
this.writePage(data.url, title, this.params.text, text);
@ -717,4 +729,3 @@ class Crawler {
}
module.exports.Crawler = Crawler;