2023-09-15 00:16:19 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import child_process from "child_process";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import fs from "fs";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("ensure that stats file is modified", async () => {
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  const child = child_process.exec(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ --text --limit 3 --collection file-stats --statsFilename progress.json",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  );
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  // detect crawler exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  let crawler_exited = false;
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  child.on("exit", function () {
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    crawler_exited = true;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  });
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // helper function to sleep
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  const sleep = (ms) => new Promise((res) => setTimeout(res, ms));
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // wait for stats file creation up to 30 secs (to not wait indefinitely)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  let counter = 0;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  while (!fs.existsSync("test-crawls/progress.json")) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    await sleep(100);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    counter++;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    expect(counter < 300).toBe(true);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // get initial modification time
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  const initial_mtime = fs.fstatSync(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fs.openSync("test-crawls/progress.json", "r"),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  ).mtime;
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // wait for crawler exit
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  while (!crawler_exited) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    await sleep(100);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // get final modification time
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  const final_mtime = fs.fstatSync(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fs.openSync("test-crawls/progress.json", "r"),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  ).mtime;
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  // compare initial and final modification time
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const diff = Math.abs(final_mtime - initial_mtime);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(diff > 0).toBe(true);
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("check that stats file format is correct", () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const data = fs.readFileSync("test-crawls/progress.json", "utf8");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const dataJSON = JSON.parse(data);
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.crawled).toEqual(3);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.total).toEqual(3);
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.pending).toEqual(0);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.failed).toEqual(0);
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.limit.max).toEqual(3);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.limit.hit).toBe(true);
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(dataJSON.pendingPages.length).toEqual(0);
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								});
							 |