mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00

replace webrecorder.net -> old.webrecorder.net to fix tests relying on old website for now
37 lines
1.2 KiB
JavaScript
37 lines
1.2 KiB
JavaScript
import fs from "fs";
|
|
import { execSync } from "child_process";
|
|
|
|
test("check that gclid query URL is automatically redirected to remove it", async () => {
|
|
try {
|
|
execSync(
|
|
"docker run --rm -v $PWD/test-crawls:/crawls -i webrecorder/browsertrix-crawler crawl --url 'https://old.webrecorder.net/about?gclid=abc' --collection test-brave-redir --behaviors \"\" --limit 1 --generateCDX");
|
|
|
|
} catch (error) {
|
|
console.log(error.stderr);
|
|
}
|
|
|
|
const filedata = fs.readFileSync(
|
|
"test-crawls/collections/test-brave-redir/indexes/index.cdxj",
|
|
{ encoding: "utf-8" },
|
|
);
|
|
|
|
let responseFound = false;
|
|
let redirectFound = false;
|
|
|
|
const lines = filedata.trim().split("\n");
|
|
|
|
for (const line of lines) {
|
|
const json = line.split(" ").slice(2).join(" ");
|
|
const data = JSON.parse(json);
|
|
if (data.url === "https://old.webrecorder.net/about?gclid=abc" && data.status === "307") {
|
|
redirectFound = true;
|
|
} else if (data.url === "https://old.webrecorder.net/about" && data.status === "200") {
|
|
responseFound = true;
|
|
}
|
|
if (responseFound && redirectFound) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
expect(redirectFound && responseFound).toBe(true);
|
|
});
|