mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
misc typos/fixes for 0.3.0:
- update README with latest params - ensure capture dir includes seconds - bump behaviors to 0.1.1
This commit is contained in:
parent
b59788ea04
commit
eff4c61270
4 changed files with 16 additions and 11 deletions
11
README.md
11
README.md
|
@ -46,7 +46,7 @@ Browsertrix Crawler includes a number of additional command-line options, explai
|
||||||
The Browsertrix Crawler docker image currently accepts the following parameters:
|
The Browsertrix Crawler docker image currently accepts the following parameters:
|
||||||
|
|
||||||
```
|
```
|
||||||
browsertrix-crawler [options]
|
crawler [options]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--help Show help [boolean]
|
--help Show help [boolean]
|
||||||
|
@ -74,7 +74,7 @@ Options:
|
||||||
-c, --collection Collection name to crawl to (replay
|
-c, --collection Collection name to crawl to (replay
|
||||||
will be accessible under this name
|
will be accessible under this name
|
||||||
in pywb preview)
|
in pywb preview)
|
||||||
[string] [default: "capture-2021-04-10T04-49-4"]
|
[string] [default: "capture-YYYY-MM-DDTHH-MM-SS"]
|
||||||
--headless Run in headless mode, otherwise
|
--headless Run in headless mode, otherwise
|
||||||
start xvfb[boolean] [default: false]
|
start xvfb[boolean] [default: false]
|
||||||
--driver JS driver for the crawler
|
--driver JS driver for the crawler
|
||||||
|
@ -82,10 +82,15 @@ Options:
|
||||||
--generateCDX, --generatecdx, If set, generate index (CDXJ) for
|
--generateCDX, --generatecdx, If set, generate index (CDXJ) for
|
||||||
--generateCdx use with pywb after crawl is done
|
--generateCdx use with pywb after crawl is done
|
||||||
[boolean] [default: false]
|
[boolean] [default: false]
|
||||||
|
--combineWARC, --combinewarc, If set, combine the warcs
|
||||||
|
--combineWarc [boolean] [default: false]
|
||||||
|
--rolloverSize If set, declare the rollover size
|
||||||
|
[number] [default: 1000000000]
|
||||||
--generateWACZ, --generatewacz, If set, generate wacz
|
--generateWACZ, --generatewacz, If set, generate wacz
|
||||||
--generateWacz [boolean] [default: false]
|
--generateWacz [boolean] [default: false]
|
||||||
--logging Logging options for crawler, can
|
--logging Logging options for crawler, can
|
||||||
include: stats, pywb, behaviors
|
include: stats, pywb, behaviors,
|
||||||
|
behaviors-debug
|
||||||
[string] [default: "stats"]
|
[string] [default: "stats"]
|
||||||
--text If set, extract text to the
|
--text If set, extract text to the
|
||||||
pages.jsonl file
|
pages.jsonl file
|
||||||
|
|
|
@ -48,7 +48,7 @@ class Crawler {
|
||||||
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
|
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
|
||||||
|
|
||||||
const params = require("yargs")
|
const params = require("yargs")
|
||||||
.usage("browsertrix-crawler [options]")
|
.usage("crawler [options]")
|
||||||
.option(this.cliOpts)
|
.option(this.cliOpts)
|
||||||
.check((argv) => this.validateArgs(argv)).argv;
|
.check((argv) => this.validateArgs(argv)).argv;
|
||||||
|
|
||||||
|
@ -193,7 +193,7 @@ class Crawler {
|
||||||
alias: "c",
|
alias: "c",
|
||||||
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
|
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
|
||||||
type: "string",
|
type: "string",
|
||||||
default: `capture-${new Date().toISOString().slice(0,18)}`.replace(/:/g, "-")
|
default: `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
|
||||||
},
|
},
|
||||||
|
|
||||||
"headless": {
|
"headless": {
|
||||||
|
@ -236,7 +236,7 @@ class Crawler {
|
||||||
},
|
},
|
||||||
|
|
||||||
"logging": {
|
"logging": {
|
||||||
describe: "Logging options for crawler, can include: stats, pywb, behaviors",
|
describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug",
|
||||||
type: "string",
|
type: "string",
|
||||||
default: "stats",
|
default: "stats",
|
||||||
},
|
},
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"abort-controller": "^3.0.0",
|
"abort-controller": "^3.0.0",
|
||||||
"browsertrix-behaviors": "^0.1.0",
|
"browsertrix-behaviors": "^0.1.1",
|
||||||
"node-fetch": "^2.6.1",
|
"node-fetch": "^2.6.1",
|
||||||
"puppeteer-cluster": "^0.22.0",
|
"puppeteer-cluster": "^0.22.0",
|
||||||
"puppeteer-core": "^5.3.1",
|
"puppeteer-core": "^5.3.1",
|
||||||
|
|
|
@ -1046,10 +1046,10 @@ browserslist@^4.14.5:
|
||||||
escalade "^3.1.1"
|
escalade "^3.1.1"
|
||||||
node-releases "^1.1.70"
|
node-releases "^1.1.70"
|
||||||
|
|
||||||
browsertrix-behaviors@^0.1.0:
|
browsertrix-behaviors@^0.1.1:
|
||||||
version "0.1.0"
|
version "0.1.1"
|
||||||
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.1.0.tgz#202aabac6dcc2b15fe4777c3cc99d3d0cc042191"
|
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.1.1.tgz#9b1b44698a6742ecb369a93354837f01307113d2"
|
||||||
integrity sha512-AfED59t8b7couu5Vzcy76BoWqCyHtYfmaR5t8ic1MoSfzz40d5WS4HfZqUWvOcoqsUfpJhjlc9R7nCptpQ6tNQ==
|
integrity sha512-x7BbuUy3y0yVrXjbNGJT9uS5Pk1nWE9N/2ovre9z7ldV7tzjZBkWGkE14W2iZsuj4mMpJlnpIsAJMcOaYYTowA==
|
||||||
|
|
||||||
bser@2.1.1:
|
bser@2.1.1:
|
||||||
version "2.1.1"
|
version "2.1.1"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue