2021-03-01 19:00:23 -05:00
|
|
|
const fs = require("fs");
|
2021-03-31 13:41:27 -04:00
|
|
|
const md5 = require("md5");
|
2021-03-01 19:00:23 -05:00
|
|
|
|
|
|
|
|
2021-03-31 13:41:27 -04:00
|
|
|
test("check that the pages.jsonl file exists in the collection under the pages folder", () => {
|
|
|
|
expect(fs.existsSync("crawls/collections/wr-net/pages/pages.jsonl")).toBe(true);
|
2021-03-01 19:00:23 -05:00
|
|
|
});
|
|
|
|
|
2021-03-31 13:41:27 -04:00
|
|
|
test("check that the pages.jsonl file exists in the wacz under the pages folder", () => {
|
|
|
|
expect(fs.existsSync("crawls/collections/wr-net/wacz/pages/pages.jsonl")).toBe(true);
|
2021-03-01 19:00:23 -05:00
|
|
|
});
|
|
|
|
|
2021-03-31 13:41:27 -04:00
|
|
|
test("check that the hash in the pages folder and in the unzipped wacz folders match", () => {
|
|
|
|
const crawl_hash = md5(JSON.parse(fs.readFileSync("crawls/collections/wr-net/wacz/pages/pages.jsonl", "utf8").split("\n")[1])["text"]);
|
|
|
|
const wacz_hash = md5(JSON.parse(fs.readFileSync("crawls/collections/wr-net/pages/pages.jsonl", "utf8").split("\n")[1])["text"]);
|
|
|
|
const fixture_hash = md5(JSON.parse(fs.readFileSync("tests/fixtures/pages.jsonl", "utf8").split("\n")[1])["text"]);
|
2021-03-01 19:00:23 -05:00
|
|
|
|
|
|
|
expect(wacz_hash).toEqual(fixture_hash);
|
|
|
|
expect(wacz_hash).toEqual(crawl_hash);
|
|
|
|
|
|
|
|
});
|
|
|
|
|