mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 22:43:17 +00:00
84 lines
2.6 KiB
JavaScript
84 lines
2.6 KiB
JavaScript
import { execSync, exec } from "child_process";
|
|
import fs from "fs";
|
|
import { Redis } from "ioredis";
|
|
|
|
|
|
const sleep = (ms) => new Promise((res) => setTimeout(res, ms));
|
|
|
|
let minioId;
|
|
|
|
beforeAll(() => {
|
|
execSync("docker network create upload-test-net");
|
|
minioId = execSync("docker run --rm -d -p 9000:9000 -p 9001:9001 --name minio --network=upload-test-net minio/minio server /data --console-address ':9001'", {encoding: "utf-8"});
|
|
});
|
|
|
|
|
|
afterAll(async () => {
|
|
execSync(`docker kill -s SIGINT ${minioId}`);
|
|
await sleep(5000);
|
|
execSync("docker network rm upload-test-net");
|
|
});
|
|
|
|
test("run crawl with upload", async () => {
|
|
|
|
execSync(`docker exec ${minioId.trim()} mc mb /data/test-bucket`);
|
|
|
|
const child = exec(
|
|
"docker run --rm " +
|
|
"-e STORE_ENDPOINT_URL=http://minio:9000/test-bucket/ " +
|
|
"-e STORE_ACCESS_KEY=minioadmin " +
|
|
"-e STORE_SECRET_KEY=minioadmin " +
|
|
"-e STORE_PATH=prefix/ " +
|
|
"--network=upload-test-net " +
|
|
"-p 36390:6379 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --limit 2 --collection upload-test --crawlId upload-test --writePagesToRedis --debugAccessRedis --generateWACZ",
|
|
);
|
|
|
|
let resolve = null;
|
|
const crawlFinished = new Promise(r => resolve = r);
|
|
|
|
// detect crawler exit
|
|
let crawler_exited = false;
|
|
child.on("exit", function () {
|
|
crawler_exited = true;
|
|
resolve();
|
|
});
|
|
|
|
const redis = new Redis("redis://127.0.0.1:36390/0", { lazyConnect: true, retryStrategy: () => null });
|
|
|
|
await sleep(3000);
|
|
|
|
await redis.connect({ maxRetriesPerRequest: 50 });
|
|
|
|
let filename;
|
|
|
|
while (!crawler_exited) {
|
|
const res = await redis.lpop("upload-test:pages");
|
|
if (!res) {
|
|
await sleep(100);
|
|
continue;
|
|
}
|
|
const json = JSON.parse(res);
|
|
expect(json).toHaveProperty("id");
|
|
expect(json).toHaveProperty("url");
|
|
expect(json).toHaveProperty("ts");
|
|
expect(json).toHaveProperty("title");
|
|
expect(json).toHaveProperty("loadState");
|
|
expect(json).toHaveProperty("filename");
|
|
expect(json).toHaveProperty("depth");
|
|
expect(json).toHaveProperty("seed");
|
|
expect(json).toHaveProperty("favIconUrl");
|
|
filename = json.filename;
|
|
break;
|
|
}
|
|
|
|
// ensure bucket is public
|
|
execSync(`docker exec ${minioId.trim()} mc config host add local http://127.0.0.1:9000 minioadmin minioadmin`);
|
|
execSync(`docker exec ${minioId.trim()} mc anonymous set download local/test-bucket`);
|
|
|
|
// wait for crawler to finish
|
|
await crawlFinished;
|
|
|
|
// ensure WACZ exists at the specified filename
|
|
const resp = await fetch(`http://127.0.0.1:9000/test-bucket/prefix/${filename}`);
|
|
expect(resp.status).toBe(200);
|
|
});
|