2021-07-07 18:56:52 -04:00
|
|
|
const fs = require("fs");
|
|
|
|
const zlib = require("zlib");
|
|
|
|
const child_process = require("child_process");
|
|
|
|
|
|
|
|
test("check that the warcinfo file works as expected on the command line", async () => {
|
|
|
|
jest.setTimeout(30000);
|
|
|
|
|
|
|
|
try{
|
|
|
|
const configYaml = fs.readFileSync("tests/fixtures/crawl-2.yaml", "utf8");
|
2021-07-23 18:31:43 -07:00
|
|
|
const proc = child_process.execSync("docker run -i -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --config stdin --limit 1 --collection warcinfo --combineWARC", {input: configYaml, stdin: "inherit", encoding: "utf8"});
|
2021-07-07 18:56:52 -04:00
|
|
|
|
|
|
|
console.log(proc);
|
|
|
|
}
|
|
|
|
catch (error) {
|
|
|
|
console.log(error);
|
|
|
|
}
|
|
|
|
|
2021-07-23 18:31:43 -07:00
|
|
|
const warcData = fs.readFileSync("test-crawls/collections/warcinfo/warcinfo_0.warc.gz");
|
2021-07-07 18:56:52 -04:00
|
|
|
|
|
|
|
const data = zlib.gunzipSync(warcData);
|
|
|
|
|
|
|
|
const string = data.toString("utf8");
|
|
|
|
|
|
|
|
expect(string.indexOf("operator: test")).toBeGreaterThan(-1);
|
|
|
|
expect(string.indexOf("host: hostname")).toBeGreaterThan(-1);
|
|
|
|
expect(string.match(/Browsertrix-Crawler \d[\w.-]+ \(with warcio.js \d[\w.-]+ pywb \d[\w.-]+\)/)).not.toEqual(null);
|
|
|
|
expect(string.indexOf("format: WARC File Format 1.0")).toBeGreaterThan(-1);
|
|
|
|
|
|
|
|
|
|
|
|
});
|