2023-11-09 11:27:11 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import { parseArgs } from "../dist/util/argParser.js";
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-10-24 15:30:10 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import fs from "fs";
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								function getSeeds(config) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const orig = fs.readFileSync;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  fs.readFileSync = (name, ...args) => {
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-23 18:31:43 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    if (name.endsWith("/stdinconfig")) {
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      return config;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return orig(name, ...args);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  };
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												State Save + Restore State from Config + Redis State + Scope Fix 0.5.0 (#78)
* save state work:
- support interrupting and saving crawl
- support loading crawl state (frontier queue, pending, done) from YAML
- support scope check when loading to apply new scoping rules when restarting crawl
- failed urls added to done as failed, can be retried if crawl is stopped and restarted
- save state to crawls/crawl-<ts>-<id>.yaml when interrupted
- --saveState option controls when crawl state is saved, default to partial/when interrupted, also always, never.
- support in-memory or redis based crawl state, using fork of puppeteer-cluster
- --redisStore used to enable redis-based state
* signals/crawl interruption:
- crawl state set to drain/not provide any more urls to crawl
- graceful stop of crawl in response to sigint/sigterm
- initial sigint/sigterm waits for graceful end of current pages, second terminates immediately
- initial sigabrt followed by sigterm terminates immediately
- puppeteer disable handleSIGTERM, handleSIGHUP, handleSIGINT
* redis state support:
- use lua scripts for atomic move from queue -> pending, and pending -> done
- pending key expiry set to page timeout
- add numPending() and numSeen() to support better puppeteer-cluster semantics for early termination
- drainMax returns the numPending() + numSeen() to work with cluster stats
* arg improvements:
- add --crawlId param, also settable via CRAWL_ID env var, defaulting to os.hostname() (used for redis key and crawl state file)
- support setting cmdline args via env var CRAWL_ARGS
- use 'choices' in args when possible
* build update:
- switch base browser image to new webrecorder/browsertrix-browser-base, simple image with .deb files only for amd64 and arm64 builds
- use setuptools<58.0
* misc crawl/scoping rule fixes:
- scoping rules fix when external is used with scopeType
state:
- limit: ensure no urls, including initial seeds, are added past the limit
- signals: fix immediate shutdown on second signal
- tests: add scope test for default scope + excludes
*  py-wacz update
- add 'seed': true to pages that are seeds for optimized wacz creation, keeping non-seeds separate (supported via wacz 0.3.2)
- pywb: use latest pywb branch for improved twitter video capture
* update to latest browsertrix-behaviors
* fix setuptools dependency #88
* update README for 0.5.0 beta
											
										 
										
											2021-09-28 09:41:16 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  const res = parseArgs(["node", "crawler", "--config", "stdinconfig"]);
							 | 
						
					
						
							
								
									
										
										
										
											2024-09-05 18:10:27 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  return res.scopedSeeds;
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("default scope", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("prefix");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												State Save + Restore State from Config + Redis State + Scope Fix 0.5.0 (#78)
* save state work:
- support interrupting and saving crawl
- support loading crawl state (frontier queue, pending, done) from YAML
- support scope check when loading to apply new scoping rules when restarting crawl
- failed urls added to done as failed, can be retried if crawl is stopped and restarted
- save state to crawls/crawl-<ts>-<id>.yaml when interrupted
- --saveState option controls when crawl state is saved, default to partial/when interrupted, also always, never.
- support in-memory or redis based crawl state, using fork of puppeteer-cluster
- --redisStore used to enable redis-based state
* signals/crawl interruption:
- crawl state set to drain/not provide any more urls to crawl
- graceful stop of crawl in response to sigint/sigterm
- initial sigint/sigterm waits for graceful end of current pages, second terminates immediately
- initial sigabrt followed by sigterm terminates immediately
- puppeteer disable handleSIGTERM, handleSIGHUP, handleSIGINT
* redis state support:
- use lua scripts for atomic move from queue -> pending, and pending -> done
- pending key expiry set to page timeout
- add numPending() and numSeen() to support better puppeteer-cluster semantics for early termination
- drainMax returns the numPending() + numSeen() to work with cluster stats
* arg improvements:
- add --crawlId param, also settable via CRAWL_ID env var, defaulting to os.hostname() (used for redis key and crawl state file)
- support setting cmdline args via env var CRAWL_ARGS
- use 'choices' in args when possible
* build update:
- switch base browser image to new webrecorder/browsertrix-browser-base, simple image with .deb files only for amd64 and arm64 builds
- use setuptools<58.0
* misc crawl/scoping rule fixes:
- scoping rules fix when external is used with scopeType
state:
- limit: ensure no urls, including initial seeds, are added past the limit
- signals: fix immediate shutdown on second signal
- tests: add scope test for default scope + excludes
*  py-wacz update
- add 'seed': true to pages that are seeds for optimized wacz creation, keeping non-seeds separate (supported via wacz 0.3.2)
- pywb: use latest pywb branch for improved twitter video capture
* update to latest browsertrix-behaviors
* fix setuptools dependency #88
* update README for 0.5.0 beta
											
										 
										
											2021-09-28 09:41:16 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("default scope + exclude", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - https://example.com/
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-13 14:34:25 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								exclude: https://example.com/pathexclude
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("prefix");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-13 14:34:25 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/https:\/\/example.com\/pathexclude/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-13 15:08:36 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("default scope + exclude is numeric", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								exclude: "2022"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("prefix");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/2022/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-13 14:34:25 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("prefix scope global + exclude", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								scopeType: prefix
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								exclude: https://example.com/pathexclude
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("prefix");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
							 | 
						
					
						
							
								
									
										
										
										
											2022-02-13 14:34:25 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/https:\/\/example.com\/pathexclude/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("prefix scope per seed + exclude", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: prefix
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												State Save + Restore State from Config + Redis State + Scope Fix 0.5.0 (#78)
* save state work:
- support interrupting and saving crawl
- support loading crawl state (frontier queue, pending, done) from YAML
- support scope check when loading to apply new scoping rules when restarting crawl
- failed urls added to done as failed, can be retried if crawl is stopped and restarted
- save state to crawls/crawl-<ts>-<id>.yaml when interrupted
- --saveState option controls when crawl state is saved, default to partial/when interrupted, also always, never.
- support in-memory or redis based crawl state, using fork of puppeteer-cluster
- --redisStore used to enable redis-based state
* signals/crawl interruption:
- crawl state set to drain/not provide any more urls to crawl
- graceful stop of crawl in response to sigint/sigterm
- initial sigint/sigterm waits for graceful end of current pages, second terminates immediately
- initial sigabrt followed by sigterm terminates immediately
- puppeteer disable handleSIGTERM, handleSIGHUP, handleSIGINT
* redis state support:
- use lua scripts for atomic move from queue -> pending, and pending -> done
- pending key expiry set to page timeout
- add numPending() and numSeen() to support better puppeteer-cluster semantics for early termination
- drainMax returns the numPending() + numSeen() to work with cluster stats
* arg improvements:
- add --crawlId param, also settable via CRAWL_ID env var, defaulting to os.hostname() (used for redis key and crawl state file)
- support setting cmdline args via env var CRAWL_ARGS
- use 'choices' in args when possible
* build update:
- switch base browser image to new webrecorder/browsertrix-browser-base, simple image with .deb files only for amd64 and arm64 builds
- use setuptools<58.0
* misc crawl/scoping rule fixes:
- scoping rules fix when external is used with scopeType
state:
- limit: ensure no urls, including initial seeds, are added past the limit
- signals: fix immediate shutdown on second signal
- tests: add scope test for default scope + excludes
*  py-wacz update
- add 'seed': true to pages that are seeds for optimized wacz creation, keeping non-seeds separate (supported via wacz 0.3.2)
- pywb: use latest pywb branch for improved twitter video capture
* update to latest browsertrix-behaviors
* fix setuptools dependency #88
* update README for 0.5.0 beta
											
										 
										
											2021-09-28 09:41:16 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								exclude: https://example.com/pathexclude
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("prefix");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												State Save + Restore State from Config + Redis State + Scope Fix 0.5.0 (#78)
* save state work:
- support interrupting and saving crawl
- support loading crawl state (frontier queue, pending, done) from YAML
- support scope check when loading to apply new scoping rules when restarting crawl
- failed urls added to done as failed, can be retried if crawl is stopped and restarted
- save state to crawls/crawl-<ts>-<id>.yaml when interrupted
- --saveState option controls when crawl state is saved, default to partial/when interrupted, also always, never.
- support in-memory or redis based crawl state, using fork of puppeteer-cluster
- --redisStore used to enable redis-based state
* signals/crawl interruption:
- crawl state set to drain/not provide any more urls to crawl
- graceful stop of crawl in response to sigint/sigterm
- initial sigint/sigterm waits for graceful end of current pages, second terminates immediately
- initial sigabrt followed by sigterm terminates immediately
- puppeteer disable handleSIGTERM, handleSIGHUP, handleSIGINT
* redis state support:
- use lua scripts for atomic move from queue -> pending, and pending -> done
- pending key expiry set to page timeout
- add numPending() and numSeen() to support better puppeteer-cluster semantics for early termination
- drainMax returns the numPending() + numSeen() to work with cluster stats
* arg improvements:
- add --crawlId param, also settable via CRAWL_ID env var, defaulting to os.hostname() (used for redis key and crawl state file)
- support setting cmdline args via env var CRAWL_ARGS
- use 'choices' in args when possible
* build update:
- switch base browser image to new webrecorder/browsertrix-browser-base, simple image with .deb files only for amd64 and arm64 builds
- use setuptools<58.0
* misc crawl/scoping rule fixes:
- scoping rules fix when external is used with scopeType
state:
- limit: ensure no urls, including initial seeds, are added past the limit
- signals: fix immediate shutdown on second signal
- tests: add scope test for default scope + excludes
*  py-wacz update
- add 'seed': true to pages that are seeds for optimized wacz creation, keeping non-seeds separate (supported via wacz 0.3.2)
- pywb: use latest pywb branch for improved twitter video capture
* update to latest browsertrix-behaviors
* fix setuptools dependency #88
* update README for 0.5.0 beta
											
										 
										
											2021-09-28 09:41:16 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/https:\/\/example.com\/pathexclude/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("host scope and domain scope", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: domain
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.org/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: host
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(2);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("domain");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/([^/]+\.)*example\.com\//]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(!!seeds[0].include[0].exec("https://example.com/")).toEqual(true);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(!!seeds[0].include[0].exec("https://example.com/path")).toEqual(true);
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(!!seeds[0].include[0].exec("https://sub.example.com/path")).toEqual(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    true,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  );
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    !!seeds[0].include[0].exec("https://sub.domain.example.com/path"),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  ).toEqual(true);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    !!seeds[0].include[0].exec("https://notsub.domainexample.com/path"),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  ).toEqual(false);
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].scopeType).toEqual("host");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].include).toEqual([/^https?:\/\/example\.org\//]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(!!seeds[1].include[0].exec("https://example.org/")).toEqual(true);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(!!seeds[1].include[0].exec("https://example.org/path")).toEqual(true);
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(!!seeds[1].include[0].exec("https://sub.example.com/path")).toEqual(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    false,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  );
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-22 17:43:13 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("domain scope drop www.", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://www.example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: domain
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("domain");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/([^/]+\.)*example\.com\//]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("custom scope", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     include: https?://example.com/(path|other)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: https?://example.com/pathexclude
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(1);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("custom");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/https?:\/\/example.com\/(path|other)/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/https?:\/\/example.com\/pathexclude/]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("inherit scope", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/2
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								include: https?://example.com/(path|other)
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								exclude: https://example.com/pathexclude
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(2);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("custom");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].url).toEqual("https://example.com/1");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/https?:\/\/example.com\/(path|other)/]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/https:\/\/example.com\/pathexclude/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].scopeType).toEqual("custom");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].url).toEqual("https://example.com/2");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].include).toEqual([/https?:\/\/example.com\/(path|other)/]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].exclude).toEqual([/https:\/\/example.com\/pathexclude/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("override scope", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     include: https://example.com/(path|other)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - https://example.com/2
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/subpath/file.html
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: prefix
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-23 09:43:11 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/subpath/file.html
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								include: https://example.com/onlythispath
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-23 09:43:11 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(4);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("custom");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].url).toEqual("https://example.com/1");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/https:\/\/example.com\/(path|other)/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].scopeType).toEqual("custom");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].url).toEqual("https://example.com/2");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].include).toEqual([/https:\/\/example.com\/onlythispath/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].scopeType).toEqual("prefix");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].url).toEqual("https://example.com/subpath/file.html");
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-09 19:11:11 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].include).toEqual([
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /^https?:\/\/example\.com\/subpath\//,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    /https:\/\/example.com\/onlythispath/,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  ]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-23 09:43:11 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].scopeType).toEqual("custom");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].url).toEqual("https://example.com/subpath/file.html");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].include).toEqual([/https:\/\/example.com\/onlythispath/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].exclude).toEqual([]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								test("override scope with exclude", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/1
							 | 
						
					
						
							
								
									
										
										
										
											2021-08-17 20:54:18 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     scopeType: page-spa
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/subpath/file.html
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: prefix
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/2
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     scopeType: any
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/3
							 | 
						
					
						
							
								
									
										
										
										
											2021-08-17 20:54:18 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     scopeType: page
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/4
							 | 
						
					
						
							
								
									
										
										
										
											2021-08-17 20:54:18 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								     scopeType: page
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: ''
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								exclude:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  - /search\\?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  - q\\?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(5);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const excludeRxs = [/\/search\?/, /q\?/];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-08-17 20:54:18 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].scopeType).toEqual("page-spa");
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].url).toEqual("https://example.com/1");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\/1#.+/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual(excludeRxs);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].scopeType).toEqual("prefix");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].url).toEqual("https://example.com/subpath/file.html");
							 | 
						
					
						
							
								
									
										
										
										
											2022-03-06 14:46:14 -08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].include).toEqual([/^https?:\/\/example\.com\/subpath\//]);
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].exclude).toEqual(excludeRxs);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].scopeType).toEqual("any");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].url).toEqual("https://example.com/2");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].include).toEqual([/.*/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].exclude).toEqual(excludeRxs);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-08-17 20:54:18 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].scopeType).toEqual("page");
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].url).toEqual("https://example.com/3");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].include).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].exclude).toEqual(excludeRxs);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-08-17 20:54:18 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[4].scopeType).toEqual("page");
							 | 
						
					
						
							
								
									
										
										
										
											2021-07-06 20:22:27 -07:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[4].url).toEqual("https://example.com/4");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[4].include).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[4].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-13 15:08:36 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								test("with exclude non-string types", async () => {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  const seeds = getSeeds(`
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								seeds:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: "2023"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: 2023
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: "0"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: ""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: null
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: "null"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: false
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								   - url: https://example.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								     exclude: true
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds.length).toEqual(10);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  for (let i = 0; i < 10; i++) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    expect(seeds[i].scopeType).toEqual("prefix");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    expect(seeds[i].include).toEqual([/^https?:\/\/example\.com\//]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[0].exclude).toEqual([/2023/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[1].exclude).toEqual([/2023/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[2].exclude).toEqual([/0/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[3].exclude).toEqual([/0/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[4].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[5].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[6].exclude).toEqual([]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[7].exclude).toEqual([/null/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[8].exclude).toEqual([/false/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  expect(seeds[9].exclude).toEqual([/true/]);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								});
							 |