diff --git a/docs/docs/user-guide/cli-options.md b/docs/docs/user-guide/cli-options.md index d819318e..4d68000e 100644 --- a/docs/docs/user-guide/cli-options.md +++ b/docs/docs/user-guide/cli-options.md @@ -324,7 +324,7 @@ Options: the Chrome instance (space-separated or multiple --extraChromeArgs) [array] [default: []] - --robots If set, fetch and respect page disal + --useRobots, --robots If set, fetch and respect page disal lows specified in per-host robots.tx t [boolean] [default: false] --robotsAgent Agent to check in addition to '*' fo diff --git a/src/util/argParser.ts b/src/util/argParser.ts index f5d6f973..78fd35b9 100644 --- a/src/util/argParser.ts +++ b/src/util/argParser.ts @@ -705,7 +705,8 @@ class ArgParser { default: [], }, - robots: { + useRobots: { + alias: "robots", describe: "If set, fetch and respect page disallows specified in per-host robots.txt", type: "boolean", diff --git a/tests/robots_txt.test.js b/tests/robots_txt.test.js index a181d016..e57f07c6 100644 --- a/tests/robots_txt.test.js +++ b/tests/robots_txt.test.js @@ -2,7 +2,7 @@ import child_process from "child_process"; test("test robots.txt is fetched and cached", async () => { const res = child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://webrecorder.net/ --scopeType page --robots --logging debug", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://webrecorder.net/ --scopeType page --useRobots --logging debug", ); const log = res.toString();