mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00

- Refactors args parsing so that `Crawler.params` is properly timed with CLI options + additions with `CrawlerArgs` type. - also adds typing to create-login-profile CLI options - validation still done w/o typing due to yargs limitations - tests: exclude slow page from tests for faster test runs
18 lines
676 B
JavaScript
18 lines
676 B
JavaScript
import child_process from "child_process";
|
|
import fs from "fs";
|
|
|
|
test("ensure dryRun crawl only writes pages and logs", async () => {
|
|
child_process.execSync(
|
|
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ --text --collection dry-run-wr-net --combineWARC --rolloverSize 10000 --limit 2 --title "test title" --description "test description" --warcPrefix custom-prefix --dryRun --exclude community',
|
|
);
|
|
|
|
const files = fs.readdirSync("test-crawls/collections/dry-run-wr-net").sort();
|
|
expect(files.length).toBe(2);
|
|
expect(files[0]).toBe("logs");
|
|
expect(files[1]).toBe("pages");
|
|
});
|
|
|
|
|
|
|
|
|
|
|