2024-06-25 15:48:22 -07:00
|
|
|
import fs from "fs";
|
|
|
|
import { execSync } from "child_process";
|
|
|
|
|
|
|
|
test("check that gclid query URL is automatically redirected to remove it", async () => {
|
|
|
|
try {
|
|
|
|
execSync(
|
2024-10-31 10:24:58 -07:00
|
|
|
"docker run --rm -v $PWD/test-crawls:/crawls -i webrecorder/browsertrix-crawler crawl --url 'https://old.webrecorder.net/about?gclid=abc' --collection test-brave-redir --behaviors \"\" --limit 1 --generateCDX");
|
2024-06-25 15:48:22 -07:00
|
|
|
|
|
|
|
} catch (error) {
|
|
|
|
console.log(error.stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
const filedata = fs.readFileSync(
|
|
|
|
"test-crawls/collections/test-brave-redir/indexes/index.cdxj",
|
|
|
|
{ encoding: "utf-8" },
|
|
|
|
);
|
|
|
|
|
|
|
|
let responseFound = false;
|
|
|
|
let redirectFound = false;
|
|
|
|
|
|
|
|
const lines = filedata.trim().split("\n");
|
|
|
|
|
|
|
|
for (const line of lines) {
|
|
|
|
const json = line.split(" ").slice(2).join(" ");
|
|
|
|
const data = JSON.parse(json);
|
2024-10-31 10:24:58 -07:00
|
|
|
if (data.url === "https://old.webrecorder.net/about?gclid=abc" && data.status === "307") {
|
2024-06-25 15:48:22 -07:00
|
|
|
redirectFound = true;
|
2024-10-31 10:24:58 -07:00
|
|
|
} else if (data.url === "https://old.webrecorder.net/about" && data.status === "200") {
|
2024-06-25 15:48:22 -07:00
|
|
|
responseFound = true;
|
|
|
|
}
|
|
|
|
if (responseFound && redirectFound) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
expect(redirectFound && responseFound).toBe(true);
|
|
|
|
});
|