misc typos/fixes for 0.3.0:

- update README with latest params
- ensure capture dir includes seconds
- bump behaviors to 0.1.1
This commit is contained in:
Ilya Kreymer 2021-04-13 18:16:37 -07:00
parent b59788ea04
commit eff4c61270
4 changed files with 16 additions and 11 deletions

View file

@ -46,7 +46,7 @@ Browsertrix Crawler includes a number of additional command-line options, explai
The Browsertrix Crawler docker image currently accepts the following parameters: The Browsertrix Crawler docker image currently accepts the following parameters:
``` ```
browsertrix-crawler [options] crawler [options]
Options: Options:
--help Show help [boolean] --help Show help [boolean]
@ -74,7 +74,7 @@ Options:
-c, --collection Collection name to crawl to (replay -c, --collection Collection name to crawl to (replay
will be accessible under this name will be accessible under this name
in pywb preview) in pywb preview)
[string] [default: "capture-2021-04-10T04-49-4"] [string] [default: "capture-YYYY-MM-DDTHH-MM-SS"]
--headless Run in headless mode, otherwise --headless Run in headless mode, otherwise
start xvfb[boolean] [default: false] start xvfb[boolean] [default: false]
--driver JS driver for the crawler --driver JS driver for the crawler
@ -82,10 +82,15 @@ Options:
--generateCDX, --generatecdx, If set, generate index (CDXJ) for --generateCDX, --generatecdx, If set, generate index (CDXJ) for
--generateCdx use with pywb after crawl is done --generateCdx use with pywb after crawl is done
[boolean] [default: false] [boolean] [default: false]
--combineWARC, --combinewarc, If set, combine the warcs
--combineWarc [boolean] [default: false]
--rolloverSize If set, declare the rollover size
[number] [default: 1000000000]
--generateWACZ, --generatewacz, If set, generate wacz --generateWACZ, --generatewacz, If set, generate wacz
--generateWacz [boolean] [default: false] --generateWacz [boolean] [default: false]
--logging Logging options for crawler, can --logging Logging options for crawler, can
include: stats, pywb, behaviors include: stats, pywb, behaviors,
behaviors-debug
[string] [default: "stats"] [string] [default: "stats"]
--text If set, extract text to the --text If set, extract text to the
pages.jsonl file pages.jsonl file

View file

@ -48,7 +48,7 @@ class Crawler {
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-")); this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
const params = require("yargs") const params = require("yargs")
.usage("browsertrix-crawler [options]") .usage("crawler [options]")
.option(this.cliOpts) .option(this.cliOpts)
.check((argv) => this.validateArgs(argv)).argv; .check((argv) => this.validateArgs(argv)).argv;
@ -193,7 +193,7 @@ class Crawler {
alias: "c", alias: "c",
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)", describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
type: "string", type: "string",
default: `capture-${new Date().toISOString().slice(0,18)}`.replace(/:/g, "-") default: `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
}, },
"headless": { "headless": {
@ -236,7 +236,7 @@ class Crawler {
}, },
"logging": { "logging": {
describe: "Logging options for crawler, can include: stats, pywb, behaviors", describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug",
type: "string", type: "string",
default: "stats", default: "stats",
}, },

View file

@ -7,7 +7,7 @@
"license": "MIT", "license": "MIT",
"dependencies": { "dependencies": {
"abort-controller": "^3.0.0", "abort-controller": "^3.0.0",
"browsertrix-behaviors": "^0.1.0", "browsertrix-behaviors": "^0.1.1",
"node-fetch": "^2.6.1", "node-fetch": "^2.6.1",
"puppeteer-cluster": "^0.22.0", "puppeteer-cluster": "^0.22.0",
"puppeteer-core": "^5.3.1", "puppeteer-core": "^5.3.1",

View file

@ -1046,10 +1046,10 @@ browserslist@^4.14.5:
escalade "^3.1.1" escalade "^3.1.1"
node-releases "^1.1.70" node-releases "^1.1.70"
browsertrix-behaviors@^0.1.0: browsertrix-behaviors@^0.1.1:
version "0.1.0" version "0.1.1"
resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.1.0.tgz#202aabac6dcc2b15fe4777c3cc99d3d0cc042191" resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.1.1.tgz#9b1b44698a6742ecb369a93354837f01307113d2"
integrity sha512-AfED59t8b7couu5Vzcy76BoWqCyHtYfmaR5t8ic1MoSfzz40d5WS4HfZqUWvOcoqsUfpJhjlc9R7nCptpQ6tNQ== integrity sha512-x7BbuUy3y0yVrXjbNGJT9uS5Pk1nWE9N/2ovre9z7ldV7tzjZBkWGkE14W2iZsuj4mMpJlnpIsAJMcOaYYTowA==
bser@2.1.1: bser@2.1.1:
version "2.1.1" version "2.1.1"