mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-08 06:09:48 +00:00
add test
This commit is contained in:
parent
896059c73b
commit
4c59c6d70e
1 changed files with 15 additions and 0 deletions
15
tests/url-normalize.test.js
Normal file
15
tests/url-normalize.test.js
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
import fs from "fs";
|
||||
import child_process from "child_process";
|
||||
|
||||
test("ensure URLs with same query args but in different order considered same URL", async () => {
|
||||
child_process.execSync("docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url 'https://example-com.webrecorder.net/?A=1&B=2' --url 'https://example-com.webrecorder.net/?B=2&A=1' --collection url-norm-1 --scopeType page");
|
||||
|
||||
// url is normalized, only 1 URL is crawled
|
||||
// check pages.jsonl for 1 URL (+ 1 header)
|
||||
expect(fs.readFileSync(
|
||||
"test-crawls/collections/url-norm-1/pages/pages.jsonl", "utf8",
|
||||
)
|
||||
.trim()
|
||||
.split("\n").length).toBe(1 + 1);
|
||||
});
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue