| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  | import { exec } from "child_process"; | 
					
						
							|  |  |  | import Redis from "ioredis"; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  | function sleep(ms) { | 
					
						
							|  |  |  |   return new Promise((resolve) => setTimeout(resolve, ms)); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  | test("dynamically add exclusion while crawl is running", async () => { | 
					
						
							|  |  |  |   let callback = null; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const p = new Promise((resolve) => { | 
					
						
							|  |  |  |     callback = (error, stdout, stderr) => { | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |       resolve({ error, stdout, stderr }); | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  |     }; | 
					
						
							|  |  |  |   }); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   try { | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |     exec( | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  |       "docker run -p 36382:6379 -e CRAWL_ID=test -v $PWD/test-crawls:/crawls -v $PWD/tests/fixtures:/tests/fixtures webrecorder/browsertrix-crawler crawl --collection add-exclusion --url https://webrecorder.net/ --scopeType prefix --limit 20 --logging debug --debugAccessRedis", | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |       { shell: "/bin/bash" }, | 
					
						
							|  |  |  |       callback, | 
					
						
							|  |  |  |     ); | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  |   } catch (error) { | 
					
						
							|  |  |  |     console.log(error); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  |   await sleep(3000); | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  |   const redis = new Redis("redis://127.0.0.1:36382/0", { lazyConnect: true, retryStrategy: () => null }) | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  |   await redis.connect(); | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |   while (true) { | 
					
						
							|  |  |  |     if (Number(await redis.zcard("test:q")) > 1) { | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  |     await sleep(500); | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   const uids = await redis.hkeys("test:status"); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   // exclude all pages containing 'webrecorder', should clear out the queue and end the crawl
 | 
					
						
							| 
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 |  |  |   await redis.rpush( | 
					
						
							|  |  |  |     `${uids[0]}:msg`, | 
					
						
							|  |  |  |     JSON.stringify({ type: "addExclusion", regex: "webrecorder" }), | 
					
						
							|  |  |  |   ); | 
					
						
							| 
									
										
										
										
											2023-10-23 09:36:10 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |   // ensure 'Add Exclusion is contained in the debug logs
 | 
					
						
							|  |  |  |   const { stdout } = await p; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   expect(stdout.indexOf("Add Exclusion") > 0).toBe(true); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   expect(stdout.indexOf("Removing excluded URL") > 0).toBe(true); | 
					
						
							|  |  |  | }); | 
					
						
							| 
									
										
										
										
											2024-03-22 17:32:42 -07:00
										 |  |  | 
 |