From ff5619e6240de9720d6e5ab52d4660133a41dfcc Mon Sep 17 00:00:00 2001 From: Tessa Walsh Date: Tue, 2 Dec 2025 18:55:25 -0500 Subject: [PATCH] Rename robots flag to --useRobots, keep --robots as alias (#932) Follow-up to https://github.com/webrecorder/browsertrix-crawler/issues/631 Based on feedback from https://github.com/webrecorder/browsertrix/pull/3029 Renaming `--robots` to `--useRobots` will allow us to keep the Browsertrix backend API more consistent with similar flags like `--useSitemap`. Keeping `--robots` as it's a nice shorthand alias. --- docs/docs/user-guide/cli-options.md | 2 +- src/util/argParser.ts | 3 ++- tests/robots_txt.test.js | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/docs/user-guide/cli-options.md b/docs/docs/user-guide/cli-options.md index d819318e..4d68000e 100644 --- a/docs/docs/user-guide/cli-options.md +++ b/docs/docs/user-guide/cli-options.md @@ -324,7 +324,7 @@ Options: the Chrome instance (space-separated or multiple --extraChromeArgs) [array] [default: []] - --robots If set, fetch and respect page disal + --useRobots, --robots If set, fetch and respect page disal lows specified in per-host robots.tx t [boolean] [default: false] --robotsAgent Agent to check in addition to '*' fo diff --git a/src/util/argParser.ts b/src/util/argParser.ts index f5d6f973..78fd35b9 100644 --- a/src/util/argParser.ts +++ b/src/util/argParser.ts @@ -705,7 +705,8 @@ class ArgParser { default: [], }, - robots: { + useRobots: { + alias: "robots", describe: "If set, fetch and respect page disallows specified in per-host robots.txt", type: "boolean", diff --git a/tests/robots_txt.test.js b/tests/robots_txt.test.js index a181d016..e57f07c6 100644 --- a/tests/robots_txt.test.js +++ b/tests/robots_txt.test.js @@ -2,7 +2,7 @@ import child_process from "child_process"; test("test robots.txt is fetched and cached", async () => { const res = child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://webrecorder.net/ --scopeType page --robots --logging debug", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://webrecorder.net/ --scopeType page --useRobots --logging debug", ); const log = res.toString();