mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-07 21:59:48 +00:00
Rename robots flag to --useRobots, keep --robots as alias (#932)
Follow-up to https://github.com/webrecorder/browsertrix-crawler/issues/631 Based on feedback from https://github.com/webrecorder/browsertrix/pull/3029 Renaming `--robots` to `--useRobots` will allow us to keep the Browsertrix backend API more consistent with similar flags like `--useSitemap`. Keeping `--robots` as it's a nice shorthand alias.
This commit is contained in:
parent
2914e93152
commit
ff5619e624
3 changed files with 4 additions and 3 deletions
|
|
@ -324,7 +324,7 @@ Options:
|
||||||
the Chrome instance (space-separated
|
the Chrome instance (space-separated
|
||||||
or multiple --extraChromeArgs)
|
or multiple --extraChromeArgs)
|
||||||
[array] [default: []]
|
[array] [default: []]
|
||||||
--robots If set, fetch and respect page disal
|
--useRobots, --robots If set, fetch and respect page disal
|
||||||
lows specified in per-host robots.tx
|
lows specified in per-host robots.tx
|
||||||
t [boolean] [default: false]
|
t [boolean] [default: false]
|
||||||
--robotsAgent Agent to check in addition to '*' fo
|
--robotsAgent Agent to check in addition to '*' fo
|
||||||
|
|
|
||||||
|
|
@ -705,7 +705,8 @@ class ArgParser {
|
||||||
default: [],
|
default: [],
|
||||||
},
|
},
|
||||||
|
|
||||||
robots: {
|
useRobots: {
|
||||||
|
alias: "robots",
|
||||||
describe:
|
describe:
|
||||||
"If set, fetch and respect page disallows specified in per-host robots.txt",
|
"If set, fetch and respect page disallows specified in per-host robots.txt",
|
||||||
type: "boolean",
|
type: "boolean",
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ import child_process from "child_process";
|
||||||
|
|
||||||
test("test robots.txt is fetched and cached", async () => {
|
test("test robots.txt is fetched and cached", async () => {
|
||||||
const res = child_process.execSync(
|
const res = child_process.execSync(
|
||||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://webrecorder.net/ --scopeType page --robots --logging debug",
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://webrecorder.net/ --scopeType page --useRobots --logging debug",
|
||||||
);
|
);
|
||||||
|
|
||||||
const log = res.toString();
|
const log = res.toString();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue