mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00

Adds support for autoclick behavior: - Adds new `autoclick` behavior option to `--behaviors`, but not enabling by default - Adds support for new exposed function `__bx_addSet` which allows autoclick behavior to persist state about links that have already been clicked to avoid duplicates, only used if link has an href - Adds a new pageFinished flag on the worker state. - Adds a on('dialog') handler to reject onbeforeunload page navigations, when in behavior (page not finished), but accept when page is finished - to allow navigation away only when behaviors are done - Update to browsertrix-behaviors 0.7.0, which supports autoclick - Add --clickSelector option to customize elements that will be clicked, defaulting to `a`. - Add --linkSelector as alias for --selectLinks for consistency - Unknown options for --behaviors printed as warnings, instead of hard exit, for forward compatibility for new behavior types in the future Fixes #728, also #216, #665, #31
28 lines
879 B
JavaScript
28 lines
879 B
JavaScript
import child_process from "child_process";
|
|
import fs from "fs";
|
|
|
|
test("set rollover to 500K and ensure individual WARCs rollover, including screenshots", async () => {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --limit 5 --exclude community --collection rollover-500K --rolloverSize 500000 --screenshot view --logging debug"
|
|
);
|
|
|
|
const warcLists = fs.readdirSync("test-crawls/collections/rollover-500K/archive");
|
|
|
|
let main = 0;
|
|
let screenshots = 0;
|
|
|
|
for (const name of warcLists) {
|
|
if (name.startsWith("rec-")) {
|
|
main++;
|
|
} else if (name.startsWith("screenshots-")) {
|
|
screenshots++;
|
|
}
|
|
}
|
|
|
|
// expect at least 6 main WARCs
|
|
expect(main).toBeGreaterThan(5);
|
|
|
|
// expect at least 2 screenshot WARCs
|
|
expect(screenshots).toBeGreaterThan(1);
|
|
|
|
});
|