2020-11-01 19:22:53 -08:00
|
|
|
|
2022-10-24 15:30:10 +02:00
|
|
|
export default async ({data, page, crawler}) => {
|
Per-Seed Scoping Rules + Crawl Depth (#63)
* scoped seeds:
- support per-seed scoping (include + exclude), allowHash, depth, and sitemap options
- support maxDepth per seed #16
- combine --url, --seed and --urlFile/--seedFile urls into a unified seed list
arg parsing:
- simplify seed file options into --seedFile/--urlFile, move option in help display
- rename --maxDepth -> --depth, supported globally and per seed
- ensure custom parsed params from argParser passed back correctly (behaviors, logging, device emulation)
- update to latest js-yaml
- rename --yamlConfig -> --config
- config: support reading config from stdin if --config set to 'stdin'
* scope: fix typo in 'prefix' scope
* update browsertrix-behaviors to 0.2.2
* tests: add test for passing config via stdin, also adding --excludes via cmdline
* update README:
- latest cli, add docs on config via stdin
- rename --yamlConfig -> --config, consolidate --seedFile/--urlFile, move arg position
- info on scoped seeds
- list current scope types
2021-06-26 13:11:29 -07:00
|
|
|
await crawler.loadPage(page, data);
|
2020-11-01 19:22:53 -08:00
|
|
|
};
|