browsertrix-crawler/tests/limit_reached.test.js
Tessa Walsh 1fcd3b7d6b
Fix failOnFailedLimit and add tests (#580)
Fixes #575

- Adds a missing await to fetching the number of failed pages from Redis
- Fixes a typo in the fatal logging message
- Adds a test to ensure that the crawl fails with exit code 17 if
--failOnInvalidStatus and --failOnFailedLimit 1 are set with a url that
will 404
2024-05-21 16:35:43 -07:00

32 lines
1.2 KiB
JavaScript

import fs from "fs";
import util from "util";
import { exec as execCallback, execSync } from "child_process";
const exec = util.promisify(execCallback);
test("ensure page limit reached", async () => {
execSync(
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --scopeType prefix --behaviors "" --url https://webrecorder.net/ --limit 12 --workers 2 --collection limit-test --statsFilename stats.json',
);
});
test("check limit written to stats file is as expected", () => {
const data = fs.readFileSync("test-crawls/stats.json", "utf8");
const dataJSON = JSON.parse(data);
expect(dataJSON.crawled).toEqual(12);
expect(dataJSON.total).toEqual(12);
expect(dataJSON.limit.hit).toBe(true);
});
test("ensure crawl fails if failOnFailedLimit is reached", async () => {
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached",
);
} catch (error) {
expect(error.code).toEqual(17);
passed = false;
}
expect(passed).toBe(false);
});