mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Add option to output stats file live, i.e. after each page crawled (#374)
* Add option to output stats file live, i.e. after each page crawled * Always output stat files after each page crawled (+ test) * Fix inversion between expected and test value
This commit is contained in:
parent
afecec01bd
commit
d72443ced3
3 changed files with 28 additions and 4 deletions
24
tests/file_stats.test.js
Normal file
24
tests/file_stats.test.js
Normal file
|
@ -0,0 +1,24 @@
|
|||
import child_process from "child_process";
|
||||
import fs from "fs";
|
||||
|
||||
|
||||
test("ensure crawl run with docker with stats file passes", async () => {
|
||||
child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url http://www.example.com/ --generateWACZ --text --collection file-stats --statsFilename progress.json");
|
||||
|
||||
});
|
||||
|
||||
test("check that a stats file exists", () => {
|
||||
expect(fs.existsSync("test-crawls/progress.json")).toBe(true);
|
||||
});
|
||||
|
||||
test("check that stats file format is correct", () => {
|
||||
const data = fs.readFileSync("test-crawls/progress.json", "utf8");
|
||||
const dataJSON = JSON.parse(data);
|
||||
expect(dataJSON.crawled).toEqual(1);
|
||||
expect(dataJSON.total).toEqual(1);
|
||||
expect(dataJSON.pending).toEqual(0);
|
||||
expect(dataJSON.failed).toEqual(0);
|
||||
expect(dataJSON.limit.max).toEqual(0);
|
||||
expect(dataJSON.limit.hit).toBe(false);
|
||||
expect(dataJSON.pendingPages.length).toEqual(0);
|
||||
});
|
Loading…
Add table
Add a link
Reference in a new issue