mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
add logging option (#29)
* add --pywb-log flag cmdline option which enables the pywb logging to stdout/stderr Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local> Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
This commit is contained in:
parent
fb0f1d8db9
commit
9ef3f25416
1 changed files with 15 additions and 4 deletions
19
crawler.js
19
crawler.js
|
@ -107,14 +107,20 @@ class Crawler {
|
|||
}
|
||||
|
||||
bootstrap() {
|
||||
const opts = {stdio: "ignore", cwd: this.params.cwd};
|
||||
let opts = {}
|
||||
if (this.params.pywb_log) {
|
||||
opts = {stdio: "inherit", cwd: this.params.cwd};
|
||||
}
|
||||
else{
|
||||
opts = {stdio: "ignore", cwd: this.params.cwd};
|
||||
}
|
||||
|
||||
this.configureUA();
|
||||
|
||||
this.headers = {"User-Agent": this.userAgent};
|
||||
|
||||
child_process.spawn("redis-server", {...opts, cwd: "/tmp/"});
|
||||
|
||||
|
||||
child_process.spawnSync("wb-manager", ["init", this.params.collection], opts);
|
||||
|
||||
opts.env = {...process.env, COLL: this.params.collection};
|
||||
|
@ -222,6 +228,12 @@ class Crawler {
|
|||
default: false,
|
||||
},
|
||||
|
||||
"pywb-log": {
|
||||
describe: "If set, generate pywb log file",
|
||||
type: "boolean",
|
||||
default: false,
|
||||
},
|
||||
|
||||
"text": {
|
||||
describe: "If set, extract text to the pages.jsonl file",
|
||||
type: "boolean",
|
||||
|
@ -420,7 +432,7 @@ class Crawler {
|
|||
if (this.params.text){
|
||||
const client = await page.target().createCDPSession();
|
||||
const result = await client.send("DOM.getDocument", {"depth": -1, "pierce": true});
|
||||
text = await new TextExtract(result).parseTextFromDom()
|
||||
text = await new TextExtract(result).parseTextFromDom();
|
||||
}
|
||||
|
||||
this.writePage(data.url, title, this.params.text, text);
|
||||
|
@ -717,4 +729,3 @@ class Crawler {
|
|||
}
|
||||
|
||||
module.exports.Crawler = Crawler;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue