2023-10-23 09:36:10 -07:00
|
|
|
import { exec } from "child_process";
|
|
|
|
import Redis from "ioredis";
|
|
|
|
|
|
|
|
test("dynamically add exclusion while crawl is running", async () => {
|
|
|
|
let callback = null;
|
|
|
|
|
|
|
|
const p = new Promise((resolve) => {
|
|
|
|
callback = (error, stdout, stderr) => {
|
2023-11-08 14:37:57 -05:00
|
|
|
resolve({ error, stdout, stderr });
|
2023-10-23 09:36:10 -07:00
|
|
|
};
|
|
|
|
});
|
|
|
|
|
|
|
|
try {
|
2023-11-08 14:37:57 -05:00
|
|
|
exec(
|
|
|
|
"docker run -p 36379:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis",
|
|
|
|
{ shell: "/bin/bash" },
|
|
|
|
callback,
|
|
|
|
);
|
2023-10-23 09:36:10 -07:00
|
|
|
} catch (error) {
|
|
|
|
console.log(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 3000));
|
|
|
|
|
2023-11-08 14:37:57 -05:00
|
|
|
const redis = new Redis("redis://127.0.0.1:36379/0", { lazyConnect: true });
|
|
|
|
|
|
|
|
await redis.connect({ maxRetriesPerRequest: 50 });
|
2023-10-23 09:36:10 -07:00
|
|
|
|
|
|
|
while (true) {
|
|
|
|
if (Number(await redis.zcard("test:q")) > 1) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
|
|
}
|
|
|
|
|
|
|
|
const uids = await redis.hkeys("test:status");
|
|
|
|
|
|
|
|
// exclude all pages containing 'webrecorder', should clear out the queue and end the crawl
|
2023-11-08 14:37:57 -05:00
|
|
|
await redis.rpush(
|
|
|
|
`${uids[0]}:msg`,
|
|
|
|
JSON.stringify({ type: "addExclusion", regex: "webrecorder" }),
|
|
|
|
);
|
2023-10-23 09:36:10 -07:00
|
|
|
|
|
|
|
// ensure 'Add Exclusion is contained in the debug logs
|
|
|
|
const { stdout } = await p;
|
|
|
|
|
|
|
|
expect(stdout.indexOf("Add Exclusion") > 0).toBe(true);
|
|
|
|
|
|
|
|
expect(stdout.indexOf("Removing excluded URL") > 0).toBe(true);
|
|
|
|
|
|
|
|
await redis.disconnect();
|
|
|
|
});
|