| 
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 |  |  | import child_process from "child_process"; | 
					
						
							|  |  |  | import fs from "fs"; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  | test("ensure that stats file is modified", async () => { | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   const child = child_process.exec( | 
					
						
							| 
									
										
										
										
											2024-10-31 10:24:58 -07:00
										 |  |  |     "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --generateWACZ --text --limit 3 --exclude community --collection file-stats --statsFilename progress.json", | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   ); | 
					
						
							| 
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  |   // detect crawler exit
 | 
					
						
							|  |  |  |   let crawler_exited = false; | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   child.on("exit", function () { | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  |     crawler_exited = true; | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // helper function to sleep
 | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   const sleep = (ms) => new Promise((res) => setTimeout(res, ms)); | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |   // wait for stats file creation up to 30 secs (to not wait indefinitely)
 | 
					
						
							|  |  |  |   let counter = 0; | 
					
						
							|  |  |  |   while (!fs.existsSync("test-crawls/progress.json")) { | 
					
						
							|  |  |  |     await sleep(100); | 
					
						
							|  |  |  |     counter++; | 
					
						
							|  |  |  |     expect(counter < 300).toBe(true); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // get initial modification time
 | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   const initial_mtime = fs.fstatSync( | 
					
						
							|  |  |  |     fs.openSync("test-crawls/progress.json", "r"), | 
					
						
							|  |  |  |   ).mtime; | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |   // wait for crawler exit
 | 
					
						
							|  |  |  |   while (!crawler_exited) { | 
					
						
							|  |  |  |     await sleep(100); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // get final modification time
 | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   const final_mtime = fs.fstatSync( | 
					
						
							|  |  |  |     fs.openSync("test-crawls/progress.json", "r"), | 
					
						
							|  |  |  |   ).mtime; | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |   // compare initial and final modification time
 | 
					
						
							|  |  |  |   const diff = Math.abs(final_mtime - initial_mtime); | 
					
						
							|  |  |  |   expect(diff > 0).toBe(true); | 
					
						
							| 
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 |  |  | }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | test("check that stats file format is correct", () => { | 
					
						
							|  |  |  |   const data = fs.readFileSync("test-crawls/progress.json", "utf8"); | 
					
						
							|  |  |  |   const dataJSON = JSON.parse(data); | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  |   expect(dataJSON.crawled).toEqual(3); | 
					
						
							|  |  |  |   expect(dataJSON.total).toEqual(3); | 
					
						
							| 
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 |  |  |   expect(dataJSON.pending).toEqual(0); | 
					
						
							|  |  |  |   expect(dataJSON.failed).toEqual(0); | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  |   expect(dataJSON.limit.max).toEqual(3); | 
					
						
							|  |  |  |   expect(dataJSON.limit.hit).toBe(true); | 
					
						
							| 
									
										
										
										
											2023-09-15 00:16:19 +02:00
										 |  |  |   expect(dataJSON.pendingPages.length).toEqual(0); | 
					
						
							| 
									
										
										
										
											2023-09-15 18:34:56 +02:00
										 |  |  | }); |