misc typos/fixes for 0.3.0:

- update README with latest params
- ensure capture dir includes seconds
- bump behaviors to 0.1.1
This commit is contained in:
Ilya Kreymer 2021-04-13 18:16:37 -07:00
parent b59788ea04
commit eff4c61270
4 changed files with 16 additions and 11 deletions

View file

@ -46,7 +46,7 @@ Browsertrix Crawler includes a number of additional command-line options, explai
The Browsertrix Crawler docker image currently accepts the following parameters:
```
browsertrix-crawler [options]
crawler [options]
Options:
--help Show help [boolean]
@ -74,7 +74,7 @@ Options:
-c, --collection Collection name to crawl to (replay
will be accessible under this name
in pywb preview)
[string] [default: "capture-2021-04-10T04-49-4"]
[string] [default: "capture-YYYY-MM-DDTHH-MM-SS"]
--headless Run in headless mode, otherwise
start xvfb[boolean] [default: false]
--driver JS driver for the crawler
@ -82,10 +82,15 @@ Options:
--generateCDX, --generatecdx, If set, generate index (CDXJ) for
--generateCdx use with pywb after crawl is done
[boolean] [default: false]
--combineWARC, --combinewarc, If set, combine the warcs
--combineWarc [boolean] [default: false]
--rolloverSize If set, declare the rollover size
[number] [default: 1000000000]
--generateWACZ, --generatewacz, If set, generate wacz
--generateWacz [boolean] [default: false]
--logging Logging options for crawler, can
include: stats, pywb, behaviors
include: stats, pywb, behaviors,
behaviors-debug
[string] [default: "stats"]
--text If set, extract text to the
pages.jsonl file

View file

@ -48,7 +48,7 @@ class Crawler {
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
const params = require("yargs")
.usage("browsertrix-crawler [options]")
.usage("crawler [options]")
.option(this.cliOpts)
.check((argv) => this.validateArgs(argv)).argv;
@ -193,7 +193,7 @@ class Crawler {
alias: "c",
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
type: "string",
default: `capture-${new Date().toISOString().slice(0,18)}`.replace(/:/g, "-")
default: `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
},
"headless": {
@ -236,7 +236,7 @@ class Crawler {
},
"logging": {
describe: "Logging options for crawler, can include: stats, pywb, behaviors",
describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug",
type: "string",
default: "stats",
},

View file

@ -7,7 +7,7 @@
"license": "MIT",
"dependencies": {
"abort-controller": "^3.0.0",
"browsertrix-behaviors": "^0.1.0",
"browsertrix-behaviors": "^0.1.1",
"node-fetch": "^2.6.1",
"puppeteer-cluster": "^0.22.0",
"puppeteer-core": "^5.3.1",

View file

@ -1046,10 +1046,10 @@ browserslist@^4.14.5:
escalade "^3.1.1"
node-releases "^1.1.70"
browsertrix-behaviors@^0.1.0:
version "0.1.0"
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.1.0.tgz#202aabac6dcc2b15fe4777c3cc99d3d0cc042191"
integrity sha512-AfED59t8b7couu5Vzcy76BoWqCyHtYfmaR5t8ic1MoSfzz40d5WS4HfZqUWvOcoqsUfpJhjlc9R7nCptpQ6tNQ==
browsertrix-behaviors@^0.1.1:
version "0.1.1"
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.1.1.tgz#9b1b44698a6742ecb369a93354837f01307113d2"
integrity sha512-x7BbuUy3y0yVrXjbNGJT9uS5Pk1nWE9N/2ovre9z7ldV7tzjZBkWGkE14W2iZsuj4mMpJlnpIsAJMcOaYYTowA==
bser@2.1.1:
version "2.1.1"