browsertrix-crawler/tests/extra_hops_depth.test.js
Tessa Walsh 0192d05f4c Implement improved json-l logging
- Add Logger class with methods for info, error, warn, debug, fatal
- Add context, timestamp, and details fields to log entries
- Log messages as JSON Lines
- Replace puppeteer-cluster stats with custom stats implementation
- Log behaviors by default
- Amend argParser to reflect logging changes
- Capture and log stdout/stderr from awaited child_processes
- Modify tests to use webrecorder.net to avoid timeouts
2023-01-19 14:17:27 -05:00

36 lines
1.1 KiB
JavaScript

import fs from "fs";
import util from "util";
import {exec as execCallback } from "child_process";
const exec = util.promisify(execCallback);
test("check that URLs are crawled 2 extra hops beyond depth", async () => {
try {
await exec("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection extra-hops-beyond --extraHops 2 --url https://webrecorder.net/ --limit 7");
}
catch (error) {
console.log(error);
}
const crawled_pages = fs.readFileSync("test-crawls/collections/extra-hops-beyond/pages/pages.jsonl", "utf8");
const expectedPages = [
"https://webrecorder.net/",
"https://webrecorder.net/blog",
"https://webrecorder.net/tools",
"https://webrecorder.net/community",
"https://webrecorder.net/about",
"https://webrecorder.net/contact",
"https://webrecorder.net/faq",
];
for (const page of crawled_pages.trim().split("\n")) {
const url = JSON.parse(page).url;
if (!url) {
continue;
}
expect(expectedPages.indexOf(url) >= 0).toBe(true);
}
});