mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00

Some page elements don't quite respond correctly if the element is not in view, so should add the setEnsureElementIsInTheViewport() to click, doubleclick, hover and change step locators.
99 lines
2.5 KiB
JavaScript
99 lines
2.5 KiB
JavaScript
import child_process from "child_process";
|
|
import fs from "fs";
|
|
|
|
test("test custom selector crawls JS files as pages", async () => {
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --collection custom-sel-1 --selectLinks \"script[src]->src\"",
|
|
);
|
|
} catch (error) {
|
|
console.log(error);
|
|
}
|
|
|
|
const crawledPages = fs.readFileSync(
|
|
"test-crawls/collections/custom-sel-1/pages/pages.jsonl",
|
|
"utf8",
|
|
);
|
|
const pages = new Set();
|
|
|
|
for (const line of crawledPages.trim().split("\n")) {
|
|
const url = JSON.parse(line).url;
|
|
if (!url) {
|
|
continue;
|
|
}
|
|
pages.add(url);
|
|
}
|
|
|
|
const crawledExtraPages = fs.readFileSync(
|
|
"test-crawls/collections/custom-sel-1/pages/extraPages.jsonl",
|
|
"utf8",
|
|
);
|
|
const extraPages = new Set();
|
|
|
|
for (const line of crawledExtraPages.trim().split("\n")) {
|
|
const url = JSON.parse(line).url;
|
|
if (!url) {
|
|
continue;
|
|
}
|
|
extraPages.add(url);
|
|
}
|
|
|
|
const expectedPages = new Set([
|
|
"https://www.iana.org/",
|
|
]);
|
|
|
|
const expectedExtraPages = new Set([
|
|
"https://www.iana.org/static/_js/jquery.js",
|
|
"https://www.iana.org/static/_js/iana.js",
|
|
]);
|
|
|
|
expect(pages).toEqual(expectedPages);
|
|
expect(extraPages).toEqual(expectedExtraPages);
|
|
});
|
|
|
|
|
|
test("test invalid selector, crawl fails", async () => {
|
|
let status = 0;
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://www.iana.org/ --collection custom-sel-invalid --selectLinks \"script[\"",
|
|
);
|
|
} catch (e) {
|
|
status = e.status;
|
|
}
|
|
|
|
// logger fatal exit code
|
|
expect(status).toBe(17);
|
|
});
|
|
|
|
test("test valid autoclick selector passes validation", async () => {
|
|
let failed = false;
|
|
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector button --scopeType page",
|
|
);
|
|
} catch (e) {
|
|
failed = true;
|
|
}
|
|
|
|
// valid clickSelector
|
|
expect(failed).toBe(false);
|
|
});
|
|
|
|
|
|
test("test invalid autoclick selector fails validation, crawl fails", async () => {
|
|
let status = 0;
|
|
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector \",\" --scopeType page",
|
|
);
|
|
} catch (e) {
|
|
status = e.status;
|
|
}
|
|
|
|
// logger fatal exit code
|
|
expect(status).toBe(17);
|
|
});
|
|
|