mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
Add --urlFile param to specify text file with a list of URLs to crawl (#38)
* Resolves #12 * Make --url param optional. Only one of --url of --urlFile should be specified. * Add ignoreScope option queueUrls() to support adding specific URLs * add tests for urlFile * bump version to 0.3.2 Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local>
This commit is contained in:
parent
2db7bc98b1
commit
63376ab6ac
10 changed files with 4334 additions and 3633 deletions
|
@ -1,5 +1,9 @@
|
|||
## CHANGES
|
||||
|
||||
v0.3.2
|
||||
- Added a `--urlFile` option: Allows users to specify a .txt file list of exact URLs to crawl (one URL per line).
|
||||
|
||||
|
||||
v0.3.1
|
||||
- Improved shutdown wait: Instead of waiting for 5 secs, wait until all pending requests are written to WARCs
|
||||
- Bug fix: Use async APIs for combine WARC to avoid spurrious issues with multiple crawls
|
||||
|
|
|
@ -52,7 +52,10 @@ Options:
|
|||
--help Show help [boolean]
|
||||
--version Show version number [boolean]
|
||||
-u, --url The URL to start crawling from
|
||||
[string] [required]
|
||||
[string]
|
||||
--urlFile, --urlfile, --url-file, If set, read a list of urls from the
|
||||
--url-list passed file INSTEAD of the url from
|
||||
the --url flag. [string]
|
||||
-w, --workers The number of workers to run in
|
||||
parallel [number] [default: 1]
|
||||
--newContext The context for each new capture,
|
||||
|
|
58
crawler.js
58
crawler.js
|
@ -154,7 +154,6 @@ class Crawler {
|
|||
alias: "u",
|
||||
describe: "The URL to start crawling from",
|
||||
type: "string",
|
||||
demandOption: true,
|
||||
},
|
||||
|
||||
"workers": {
|
||||
|
@ -246,7 +245,13 @@ class Crawler {
|
|||
type: "string",
|
||||
default: "stats",
|
||||
},
|
||||
|
||||
|
||||
"urlFile": {
|
||||
alias: ["urlfile", "url-file", "url-list"],
|
||||
describe: "If set, read a list of urls from the passed file INSTEAD of the url from the --url flag.",
|
||||
type: "string",
|
||||
},
|
||||
|
||||
"text": {
|
||||
describe: "If set, extract text to the pages.jsonl file",
|
||||
type: "boolean",
|
||||
|
@ -312,10 +317,15 @@ class Crawler {
|
|||
//argv.seeds = [Crawler.validateUserUrl(argv.url)];
|
||||
argv.url = this.validateUserUrl(argv.url);
|
||||
}
|
||||
|
||||
if (!argv.scope) {
|
||||
|
||||
if (argv.url && argv.urlFile) {
|
||||
console.log("You've passed a urlFile param, only urls listed in that file will be processed. If you also passed a url to the --url flag that will be ignored.");
|
||||
}
|
||||
|
||||
if (!argv.scope && argv.url && !argv.urlFile) {
|
||||
//argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1);
|
||||
argv.scope = [new RegExp("^" + this.rxEscape(argv.url.slice(0, argv.url.lastIndexOf("/") + 1)))];
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -550,11 +560,18 @@ class Crawler {
|
|||
});
|
||||
|
||||
this.cluster.task((opts) => this.crawlPage(opts));
|
||||
|
||||
|
||||
await this.initPages();
|
||||
|
||||
this.queueUrl(this.params.url);
|
||||
|
||||
|
||||
if (this.params.urlFile) {
|
||||
const urlSeedFile = await fsp.readFile(path.join(__dirname, this.params.urlFile), "utf8");
|
||||
const urlSeedFileList = urlSeedFile.split("\n");
|
||||
this.queueUrls(urlSeedFileList, true);
|
||||
}
|
||||
|
||||
if (!this.params.urlFile) {
|
||||
this.queueUrl(this.params.url);
|
||||
}
|
||||
if (this.params.useSitemap) {
|
||||
await this.parseSitemap(this.params.useSitemap);
|
||||
}
|
||||
|
@ -633,10 +650,10 @@ class Crawler {
|
|||
this.queueUrls(results);
|
||||
}
|
||||
|
||||
queueUrls(urls) {
|
||||
queueUrls(urls, ignoreScope=false) {
|
||||
try {
|
||||
for (const url of urls) {
|
||||
const captureUrl = this.shouldCrawl(url);
|
||||
const captureUrl = this.shouldCrawl(url, ignoreScope);
|
||||
if (captureUrl) {
|
||||
if (!this.queueUrl(captureUrl)) {
|
||||
break;
|
||||
|
@ -707,7 +724,7 @@ class Crawler {
|
|||
}
|
||||
}
|
||||
|
||||
shouldCrawl(url) {
|
||||
shouldCrawl(url, ignoreScope) {
|
||||
try {
|
||||
url = new URL(url);
|
||||
} catch(e) {
|
||||
|
@ -728,17 +745,22 @@ class Crawler {
|
|||
if (this.seenList.has(url)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
let inScope = false;
|
||||
|
||||
if (ignoreScope){
|
||||
inScope = true;
|
||||
}
|
||||
|
||||
// check scopes
|
||||
for (const s of this.params.scope) {
|
||||
if (s.exec(url)) {
|
||||
inScope = true;
|
||||
break;
|
||||
if (!ignoreScope){
|
||||
for (const s of this.params.scope) {
|
||||
if (s.exec(url)) {
|
||||
inScope = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!inScope) {
|
||||
//console.log(`Not in scope ${url} ${scope}`);
|
||||
return false;
|
||||
|
@ -835,9 +857,7 @@ class Crawler {
|
|||
|
||||
try {
|
||||
const { sites } = await sitemapper.fetch();
|
||||
|
||||
this.queueUrls(sites);
|
||||
|
||||
} catch(e) {
|
||||
console.log(e);
|
||||
}
|
||||
|
|
|
@ -2,12 +2,13 @@ version: '3.5'
|
|||
|
||||
services:
|
||||
crawler:
|
||||
image: webrecorder/browsertrix-crawler:0.3.1
|
||||
image: webrecorder/browsertrix-crawler:0.3.2
|
||||
build:
|
||||
context: ./
|
||||
|
||||
volumes:
|
||||
- ./crawls:/crawls
|
||||
- ./tests/fixtures:/fixtures
|
||||
|
||||
cap_add:
|
||||
- NET_ADMIN
|
||||
|
|
580
package-lock.json
generated
580
package-lock.json
generated
|
@ -1,14 +1,16 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "0.2.1-beta.0",
|
||||
"version": "0.3.2",
|
||||
"lockfileVersion": 2,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"version": "0.2.1-beta.0",
|
||||
"version": "0.3.2",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"abort-controller": "^3.0.0",
|
||||
"browsertrix-behaviors": "^0.2.1",
|
||||
"ioredis": "^4.27.1",
|
||||
"node-fetch": "^2.6.1",
|
||||
"puppeteer-cluster": "^0.22.0",
|
||||
"puppeteer-core": "^5.3.1",
|
||||
|
@ -20,7 +22,8 @@
|
|||
"eslint": "^7.20.0",
|
||||
"eslint-plugin-react": "^7.22.0",
|
||||
"jest": "^26.6.3",
|
||||
"md5": "^2.3.0"
|
||||
"md5": "^2.3.0",
|
||||
"warcio": "^1.4.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/code-frame": {
|
||||
|
@ -771,7 +774,6 @@
|
|||
"jest-resolve": "^26.6.2",
|
||||
"jest-util": "^26.6.2",
|
||||
"jest-worker": "^26.6.2",
|
||||
"node-notifier": "^8.0.0",
|
||||
"slash": "^3.0.0",
|
||||
"source-map": "^0.6.0",
|
||||
"string-length": "^4.0.1",
|
||||
|
@ -872,6 +874,46 @@
|
|||
"node": ">= 10.14.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@peculiar/asn1-schema": {
|
||||
"version": "2.0.32",
|
||||
"resolved": "https://registry.npmjs.org/@peculiar/asn1-schema/-/asn1-schema-2.0.32.tgz",
|
||||
"integrity": "sha512-JzGUVxOFN+RKslJrGAxcq4l6tEmmLY1XuALHINVxc8BJsB4bXOdZzTvxbN9dCPk65Vbulno0B6DmImZ7I6SO8w==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@types/asn1js": "^2.0.0",
|
||||
"asn1js": "^2.1.1",
|
||||
"pvtsutils": "^1.1.2",
|
||||
"tslib": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@peculiar/json-schema": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/@peculiar/json-schema/-/json-schema-1.1.12.tgz",
|
||||
"integrity": "sha512-coUfuoMeIB7B8/NMekxaDzLhaYmp0HZNPEjYRm9goRou8UZIC3z21s0sL9AWoCw4EG876QyO3kYrc61WNF9B/w==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"tslib": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@peculiar/webcrypto": {
|
||||
"version": "1.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@peculiar/webcrypto/-/webcrypto-1.1.7.tgz",
|
||||
"integrity": "sha512-aCNLYdHZkvGH+T8/YBOY33jrVGVuLIa3bpizeHXqwN+P4ZtixhA+kxEEWM1amZwUY2nY/iuj+5jdZn/zB7EPPQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@peculiar/asn1-schema": "^2.0.32",
|
||||
"@peculiar/json-schema": "^1.1.12",
|
||||
"pvtsutils": "^1.1.6",
|
||||
"tslib": "^2.2.0",
|
||||
"webcrypto-core": "^1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.12.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@sindresorhus/is": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-4.0.0.tgz",
|
||||
|
@ -912,6 +954,12 @@
|
|||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/asn1js": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/asn1js/-/asn1js-2.0.0.tgz",
|
||||
"integrity": "sha512-Jjzp5EqU0hNpADctc/UqhiFbY1y2MqIxBVa2S4dBlbnZHTLPMuggoL5q43X63LpsOIINRDirBjP56DUUKIUWIA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/babel__core": {
|
||||
"version": "7.1.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.1.12.tgz",
|
||||
|
@ -1308,6 +1356,18 @@
|
|||
"safer-buffer": "~2.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/asn1js": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/asn1js/-/asn1js-2.1.1.tgz",
|
||||
"integrity": "sha512-t9u0dU0rJN4ML+uxgN6VM2Z4H5jWIYm0w8LsZLzMJaQsgL3IJNbxHgmbWDvJAwspyHpDFuzUaUFh4c05UB4+6g==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"pvutils": "latest"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/assert-plus": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz",
|
||||
|
@ -1569,6 +1629,11 @@
|
|||
"url": "https://opencollective.com/browserslist"
|
||||
}
|
||||
},
|
||||
"node_modules/browsertrix-behaviors": {
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmjs.org/browsertrix-behaviors/-/browsertrix-behaviors-0.2.1.tgz",
|
||||
"integrity": "sha512-NkReAj+PMS91oewA0tpsyvQVFspsy8mhKLXT8sg33pSDGYCDVlezqPt2agKmsmG1Y2m39yOzt31L8A3v4WcJDw=="
|
||||
},
|
||||
"node_modules/bser": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz",
|
||||
|
@ -1868,6 +1933,14 @@
|
|||
"mimic-response": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/cluster-key-slot": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.0.tgz",
|
||||
"integrity": "sha512-2Nii8p3RwAPiFwsnZvukotvow2rIHM+yQ6ZcBXGHdniadkYGZYiGmkHJIbZPIV9nfv7m/U1IPMVVcAhoWFeklw==",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/co": {
|
||||
"version": "4.6.0",
|
||||
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
|
||||
|
@ -2046,10 +2119,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.2.0.tgz",
|
||||
"integrity": "sha512-IX2ncY78vDTjZMFUdmsvIRFY2Cf4FnD0wRs+nQwJU8Lu99/tPFdb0VybiiMTPe3I6rQmwsqQqRBvxU+bZ/I8sg==",
|
||||
"deprecated": "Debug versions >=3.2.0 <3.2.7 || >=4 <4.3.1 have a low-severity ReDos regression when used in a Node.js environment. It is recommended you upgrade to 3.2.7 or 4.3.1. (https://github.com/visionmedia/debug/issues/797)",
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
|
||||
"integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==",
|
||||
"dependencies": {
|
||||
"ms": "2.1.2"
|
||||
},
|
||||
|
@ -2168,6 +2240,14 @@
|
|||
"node": ">=0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/denque": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/denque/-/denque-1.5.0.tgz",
|
||||
"integrity": "sha512-CYiCSgIF1p6EUByQPlGkKnP1M9g0ZV3qMIrqMqZqdwazygIA/YP2vrbcyl1h/WppKJTdl1F85cXIle+394iDAQ==",
|
||||
"engines": {
|
||||
"node": ">=0.10"
|
||||
}
|
||||
},
|
||||
"node_modules/detect-newline": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz",
|
||||
|
@ -2358,8 +2438,7 @@
|
|||
"esprima": "^4.0.1",
|
||||
"estraverse": "^4.2.0",
|
||||
"esutils": "^2.0.2",
|
||||
"optionator": "^0.8.1",
|
||||
"source-map": "~0.6.1"
|
||||
"optionator": "^0.8.1"
|
||||
},
|
||||
"bin": {
|
||||
"escodegen": "bin/escodegen.js",
|
||||
|
@ -2560,6 +2639,15 @@
|
|||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/esm": {
|
||||
"version": "3.2.25",
|
||||
"resolved": "https://registry.npmjs.org/esm/-/esm-3.2.25.tgz",
|
||||
"integrity": "sha512-U1suiZ2oDVWv4zPO56S0NcR5QriEahGtdN2OR6FiOG4WJvcjBVFB0qI4+eKoWFH483PKGuLuu6V8Z4T5g63UVA==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/espree": {
|
||||
"version": "7.3.1",
|
||||
"resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz",
|
||||
|
@ -2932,7 +3020,6 @@
|
|||
"resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz",
|
||||
"integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==",
|
||||
"dependencies": {
|
||||
"@types/yauzl": "^2.9.1",
|
||||
"debug": "^4.1.1",
|
||||
"get-stream": "^5.1.0",
|
||||
"yauzl": "^2.10.0"
|
||||
|
@ -3400,6 +3487,12 @@
|
|||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/hi-base32": {
|
||||
"version": "0.5.1",
|
||||
"resolved": "https://registry.npmjs.org/hi-base32/-/hi-base32-0.5.1.tgz",
|
||||
"integrity": "sha512-EmBBpvdYh/4XxsnUybsPag6VikPYnN30td+vQk+GI3qpahVEG9+gTkG0aXVxTjBqQ5T6ijbWIu77O+C5WFWsnA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/hosted-git-info": {
|
||||
"version": "2.8.8",
|
||||
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.8.tgz",
|
||||
|
@ -3572,6 +3665,30 @@
|
|||
"node": ">= 0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/ioredis": {
|
||||
"version": "4.27.2",
|
||||
"resolved": "https://registry.npmjs.org/ioredis/-/ioredis-4.27.2.tgz",
|
||||
"integrity": "sha512-7OpYymIthonkC2Jne5uGWXswdhlua1S1rWGAERaotn0hGJWTSURvxdHA9G6wNbT/qKCloCja/FHsfKXW8lpTmg==",
|
||||
"dependencies": {
|
||||
"cluster-key-slot": "^1.1.0",
|
||||
"debug": "^4.3.1",
|
||||
"denque": "^1.1.0",
|
||||
"lodash.defaults": "^4.2.0",
|
||||
"lodash.flatten": "^4.4.0",
|
||||
"p-map": "^2.1.0",
|
||||
"redis-commands": "1.7.0",
|
||||
"redis-errors": "^1.2.0",
|
||||
"redis-parser": "^3.0.0",
|
||||
"standard-as-callback": "^2.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/ioredis"
|
||||
}
|
||||
},
|
||||
"node_modules/ip-regex": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz",
|
||||
|
@ -4224,7 +4341,6 @@
|
|||
"@types/node": "*",
|
||||
"anymatch": "^3.0.3",
|
||||
"fb-watchman": "^2.0.0",
|
||||
"fsevents": "^2.1.2",
|
||||
"graceful-fs": "^4.2.4",
|
||||
"jest-regex-util": "^26.0.0",
|
||||
"jest-serializer": "^26.6.2",
|
||||
|
@ -4877,6 +4993,16 @@
|
|||
"integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
"integrity": "sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw="
|
||||
},
|
||||
"node_modules/lodash.flatten": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
|
||||
"integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8="
|
||||
},
|
||||
"node_modules/lodash.sortby": {
|
||||
"version": "4.7.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz",
|
||||
|
@ -5507,6 +5633,14 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/p-map": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/p-map/-/p-map-2.1.0.tgz",
|
||||
"integrity": "sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw==",
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/p-try": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
|
||||
|
@ -5515,6 +5649,12 @@
|
|||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/pako": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
|
||||
"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/parent-module": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
|
||||
|
@ -5798,6 +5938,24 @@
|
|||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.799653.tgz",
|
||||
"integrity": "sha512-t1CcaZbvm8pOlikqrsIM9GOa7Ipp07+4h/q9u0JXBWjPCjHdBl9KkddX87Vv9vBHoBGtwV79sYQNGnQM6iS5gg=="
|
||||
},
|
||||
"node_modules/pvtsutils": {
|
||||
"version": "1.1.6",
|
||||
"resolved": "https://registry.npmjs.org/pvtsutils/-/pvtsutils-1.1.6.tgz",
|
||||
"integrity": "sha512-Tm/74+LIqWtItcZHBJztPEPqLzNKbtPAA3LoFt763PFCHxmCfrF4YXhdFEiPAxMTakR0shbVymKKyMxg1Zqt4A==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"tslib": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/pvutils": {
|
||||
"version": "1.0.17",
|
||||
"resolved": "https://registry.npmjs.org/pvutils/-/pvutils-1.0.17.tgz",
|
||||
"integrity": "sha512-wLHYUQxWaXVQvKnwIDWFVKDJku9XDCvyhhxoq8dc5MFdIlRenyPI9eSfEtcvgHgD7FlvCyGAlWgOzRnZD99GZQ==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=6.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/qs": {
|
||||
"version": "6.5.2",
|
||||
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
|
||||
|
@ -5878,6 +6036,30 @@
|
|||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/redis-commands": {
|
||||
"version": "1.7.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.7.0.tgz",
|
||||
"integrity": "sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ=="
|
||||
},
|
||||
"node_modules/redis-errors": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-errors/-/redis-errors-1.2.0.tgz",
|
||||
"integrity": "sha1-62LSrbFeTq9GEMBK/hUpOEJQq60=",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/redis-parser": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-3.0.0.tgz",
|
||||
"integrity": "sha1-tm2CjNyv5rS4pCin3vTGvKwxyLQ=",
|
||||
"dependencies": {
|
||||
"redis-errors": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/regex-not": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/regex-not/-/regex-not-1.0.2.tgz",
|
||||
|
@ -6973,6 +7155,11 @@
|
|||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/standard-as-callback": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/standard-as-callback/-/standard-as-callback-2.1.0.tgz",
|
||||
"integrity": "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A=="
|
||||
},
|
||||
"node_modules/static-extend": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/static-extend/-/static-extend-0.1.2.tgz",
|
||||
|
@ -7439,6 +7626,12 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/tslib": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz",
|
||||
"integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/tunnel-agent": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
||||
|
@ -7615,6 +7808,12 @@
|
|||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid-random": {
|
||||
"version": "1.3.2",
|
||||
"resolved": "https://registry.npmjs.org/uuid-random/-/uuid-random-1.3.2.tgz",
|
||||
"integrity": "sha512-UOzej0Le/UgkbWEO8flm+0y+G+ljUon1QWTEZOq1rnMAsxo2+SckbiZdKzAHHlVh6gJqI1TjC/xwgR50MuCrBQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/v8-compile-cache": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.2.0.tgz",
|
||||
|
@ -7698,6 +7897,106 @@
|
|||
"makeerror": "1.0.x"
|
||||
}
|
||||
},
|
||||
"node_modules/warcio": {
|
||||
"version": "1.4.5",
|
||||
"resolved": "https://registry.npmjs.org/warcio/-/warcio-1.4.5.tgz",
|
||||
"integrity": "sha512-VwFBdmEQhWHmxsdyiLM0INHD1KZ2+EGYzslZXFe6JdbuTfSF/dYRQ/wEdvp+m28mydphROF6D32KfkIMRU1NZw==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@peculiar/webcrypto": "^1.1.1",
|
||||
"esm": "^3.2.25",
|
||||
"hi-base32": "^0.5.0",
|
||||
"node-fetch": "^2.6.0",
|
||||
"pako": "^1.0.11",
|
||||
"uuid-random": "^1.3.0",
|
||||
"yargs": "^15.3.1"
|
||||
},
|
||||
"bin": {
|
||||
"warcio.js": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/warcio/node_modules/cliui": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
|
||||
"integrity": "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"string-width": "^4.2.0",
|
||||
"strip-ansi": "^6.0.0",
|
||||
"wrap-ansi": "^6.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/warcio/node_modules/wrap-ansi": {
|
||||
"version": "6.2.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz",
|
||||
"integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/warcio/node_modules/y18n": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.3.tgz",
|
||||
"integrity": "sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/warcio/node_modules/yargs": {
|
||||
"version": "15.4.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-15.4.1.tgz",
|
||||
"integrity": "sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"cliui": "^6.0.0",
|
||||
"decamelize": "^1.2.0",
|
||||
"find-up": "^4.1.0",
|
||||
"get-caller-file": "^2.0.1",
|
||||
"require-directory": "^2.1.1",
|
||||
"require-main-filename": "^2.0.0",
|
||||
"set-blocking": "^2.0.0",
|
||||
"string-width": "^4.2.0",
|
||||
"which-module": "^2.0.0",
|
||||
"y18n": "^4.0.0",
|
||||
"yargs-parser": "^18.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/warcio/node_modules/yargs-parser": {
|
||||
"version": "18.1.3",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-18.1.3.tgz",
|
||||
"integrity": "sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"camelcase": "^5.0.0",
|
||||
"decamelize": "^1.2.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=6"
|
||||
}
|
||||
},
|
||||
"node_modules/webcrypto-core": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/webcrypto-core/-/webcrypto-core-1.2.0.tgz",
|
||||
"integrity": "sha512-p76Z/YLuE4CHCRdc49FB/ETaM4bzM3roqWNJeGs+QNY1fOTzKTOVnhmudW1fuO+5EZg6/4LG9NJ6gaAyxTk9XQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"@peculiar/asn1-schema": "^2.0.27",
|
||||
"@peculiar/json-schema": "^1.1.12",
|
||||
"asn1js": "^2.0.26",
|
||||
"pvtsutils": "^1.1.2",
|
||||
"tslib": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/webidl-conversions": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-6.1.0.tgz",
|
||||
|
@ -8630,6 +8929,40 @@
|
|||
"chalk": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"@peculiar/asn1-schema": {
|
||||
"version": "2.0.32",
|
||||
"resolved": "https://registry.npmjs.org/@peculiar/asn1-schema/-/asn1-schema-2.0.32.tgz",
|
||||
"integrity": "sha512-JzGUVxOFN+RKslJrGAxcq4l6tEmmLY1XuALHINVxc8BJsB4bXOdZzTvxbN9dCPk65Vbulno0B6DmImZ7I6SO8w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@types/asn1js": "^2.0.0",
|
||||
"asn1js": "^2.1.1",
|
||||
"pvtsutils": "^1.1.2",
|
||||
"tslib": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"@peculiar/json-schema": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/@peculiar/json-schema/-/json-schema-1.1.12.tgz",
|
||||
"integrity": "sha512-coUfuoMeIB7B8/NMekxaDzLhaYmp0HZNPEjYRm9goRou8UZIC3z21s0sL9AWoCw4EG876QyO3kYrc61WNF9B/w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"tslib": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"@peculiar/webcrypto": {
|
||||
"version": "1.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@peculiar/webcrypto/-/webcrypto-1.1.7.tgz",
|
||||
"integrity": "sha512-aCNLYdHZkvGH+T8/YBOY33jrVGVuLIa3bpizeHXqwN+P4ZtixhA+kxEEWM1amZwUY2nY/iuj+5jdZn/zB7EPPQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@peculiar/asn1-schema": "^2.0.32",
|
||||
"@peculiar/json-schema": "^1.1.12",
|
||||
"pvtsutils": "^1.1.6",
|
||||
"tslib": "^2.2.0",
|
||||
"webcrypto-core": "^1.2.0"
|
||||
}
|
||||
},
|
||||
"@sindresorhus/is": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-4.0.0.tgz",
|
||||
|
@ -8661,6 +8994,12 @@
|
|||
"defer-to-connect": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"@types/asn1js": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/asn1js/-/asn1js-2.0.0.tgz",
|
||||
"integrity": "sha512-Jjzp5EqU0hNpADctc/UqhiFbY1y2MqIxBVa2S4dBlbnZHTLPMuggoL5q43X63LpsOIINRDirBjP56DUUKIUWIA==",
|
||||
"dev": true
|
||||
},
|
||||
"@types/babel__core": {
|
||||
"version": "7.1.12",
|
||||
"resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.1.12.tgz",
|
||||
|
@ -8987,6 +9326,15 @@
|
|||
"safer-buffer": "~2.1.0"
|
||||
}
|
||||
},
|
||||
"asn1js": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/asn1js/-/asn1js-2.1.1.tgz",
|
||||
"integrity": "sha512-t9u0dU0rJN4ML+uxgN6VM2Z4H5jWIYm0w8LsZLzMJaQsgL3IJNbxHgmbWDvJAwspyHpDFuzUaUFh4c05UB4+6g==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"pvutils": "latest"
|
||||
}
|
||||
},
|
||||
"assert-plus": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/assert-plus/-/assert-plus-1.0.0.tgz",
|
||||
|
@ -9192,6 +9540,11 @@
|
|||
"node-releases": "^1.1.70"
|
||||
}
|
||||
},
|
||||
"browsertrix-behaviors": {
|
||||
"version": "0.2.1",
|
||||
"resolved": "https://registry.npmjs.org/browsertrix-behaviors/-/browsertrix-behaviors-0.2.1.tgz",
|
||||
"integrity": "sha512-NkReAj+PMS91oewA0tpsyvQVFspsy8mhKLXT8sg33pSDGYCDVlezqPt2agKmsmG1Y2m39yOzt31L8A3v4WcJDw=="
|
||||
},
|
||||
"bser": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/bser/-/bser-2.1.1.tgz",
|
||||
|
@ -9437,6 +9790,11 @@
|
|||
"mimic-response": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"cluster-key-slot": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.0.tgz",
|
||||
"integrity": "sha512-2Nii8p3RwAPiFwsnZvukotvow2rIHM+yQ6ZcBXGHdniadkYGZYiGmkHJIbZPIV9nfv7m/U1IPMVVcAhoWFeklw=="
|
||||
},
|
||||
"co": {
|
||||
"version": "4.6.0",
|
||||
"resolved": "https://registry.npmjs.org/co/-/co-4.6.0.tgz",
|
||||
|
@ -9588,9 +9946,9 @@
|
|||
}
|
||||
},
|
||||
"debug": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.2.0.tgz",
|
||||
"integrity": "sha512-IX2ncY78vDTjZMFUdmsvIRFY2Cf4FnD0wRs+nQwJU8Lu99/tPFdb0VybiiMTPe3I6rQmwsqQqRBvxU+bZ/I8sg==",
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.3.1.tgz",
|
||||
"integrity": "sha512-doEwdvm4PCeK4K3RQN2ZC2BYUBaxwLARCqZmMjtF8a51J2Rb0xpVloFRnCODwqjpwnAoao4pelN8l3RJdv3gRQ==",
|
||||
"requires": {
|
||||
"ms": "2.1.2"
|
||||
}
|
||||
|
@ -9670,6 +10028,11 @@
|
|||
"integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=",
|
||||
"dev": true
|
||||
},
|
||||
"denque": {
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/denque/-/denque-1.5.0.tgz",
|
||||
"integrity": "sha512-CYiCSgIF1p6EUByQPlGkKnP1M9g0ZV3qMIrqMqZqdwazygIA/YP2vrbcyl1h/WppKJTdl1F85cXIle+394iDAQ=="
|
||||
},
|
||||
"detect-newline": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/detect-newline/-/detect-newline-3.1.0.tgz",
|
||||
|
@ -9973,6 +10336,12 @@
|
|||
"integrity": "sha512-QudtT6av5WXels9WjIM7qz1XD1cWGvX4gGXvp/zBn9nXG02D0utdU3Em2m/QjTnrsk6bBjmCygl3rmj118msQQ==",
|
||||
"dev": true
|
||||
},
|
||||
"esm": {
|
||||
"version": "3.2.25",
|
||||
"resolved": "https://registry.npmjs.org/esm/-/esm-3.2.25.tgz",
|
||||
"integrity": "sha512-U1suiZ2oDVWv4zPO56S0NcR5QriEahGtdN2OR6FiOG4WJvcjBVFB0qI4+eKoWFH483PKGuLuu6V8Z4T5g63UVA==",
|
||||
"dev": true
|
||||
},
|
||||
"espree": {
|
||||
"version": "7.3.1",
|
||||
"resolved": "https://registry.npmjs.org/espree/-/espree-7.3.1.tgz",
|
||||
|
@ -10621,6 +10990,12 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"hi-base32": {
|
||||
"version": "0.5.1",
|
||||
"resolved": "https://registry.npmjs.org/hi-base32/-/hi-base32-0.5.1.tgz",
|
||||
"integrity": "sha512-EmBBpvdYh/4XxsnUybsPag6VikPYnN30td+vQk+GI3qpahVEG9+gTkG0aXVxTjBqQ5T6ijbWIu77O+C5WFWsnA==",
|
||||
"dev": true
|
||||
},
|
||||
"hosted-git-info": {
|
||||
"version": "2.8.8",
|
||||
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.8.tgz",
|
||||
|
@ -10753,6 +11128,23 @@
|
|||
"side-channel": "^1.0.4"
|
||||
}
|
||||
},
|
||||
"ioredis": {
|
||||
"version": "4.27.2",
|
||||
"resolved": "https://registry.npmjs.org/ioredis/-/ioredis-4.27.2.tgz",
|
||||
"integrity": "sha512-7OpYymIthonkC2Jne5uGWXswdhlua1S1rWGAERaotn0hGJWTSURvxdHA9G6wNbT/qKCloCja/FHsfKXW8lpTmg==",
|
||||
"requires": {
|
||||
"cluster-key-slot": "^1.1.0",
|
||||
"debug": "^4.3.1",
|
||||
"denque": "^1.1.0",
|
||||
"lodash.defaults": "^4.2.0",
|
||||
"lodash.flatten": "^4.4.0",
|
||||
"p-map": "^2.1.0",
|
||||
"redis-commands": "1.7.0",
|
||||
"redis-errors": "^1.2.0",
|
||||
"redis-parser": "^3.0.0",
|
||||
"standard-as-callback": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"ip-regex": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/ip-regex/-/ip-regex-2.1.0.tgz",
|
||||
|
@ -11770,6 +12162,16 @@
|
|||
"integrity": "sha512-PlhdFcillOINfeV7Ni6oF1TAEayyZBoZ8bcshTHqOYJYlrqzRK5hagpagky5o4HfCzzd1TRkXPMFq6cKk9rGmA==",
|
||||
"dev": true
|
||||
},
|
||||
"lodash.defaults": {
|
||||
"version": "4.2.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.defaults/-/lodash.defaults-4.2.0.tgz",
|
||||
"integrity": "sha1-0JF4cW/+pN3p5ft7N/bwgCJ0WAw="
|
||||
},
|
||||
"lodash.flatten": {
|
||||
"version": "4.4.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.flatten/-/lodash.flatten-4.4.0.tgz",
|
||||
"integrity": "sha1-8xwiIlqWMtK7+OSt2+8kCqdlph8="
|
||||
},
|
||||
"lodash.sortby": {
|
||||
"version": "4.7.0",
|
||||
"resolved": "https://registry.npmjs.org/lodash.sortby/-/lodash.sortby-4.7.0.tgz",
|
||||
|
@ -12255,11 +12657,22 @@
|
|||
"p-limit": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"p-map": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/p-map/-/p-map-2.1.0.tgz",
|
||||
"integrity": "sha512-y3b8Kpd8OAN444hxfBbFfj1FY/RjtTd8tzYwhUqNYXx0fXx2iX4maP4Qr6qhIKbQXI02wTLAda4fYUbDagTUFw=="
|
||||
},
|
||||
"p-try": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
|
||||
"integrity": "sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ=="
|
||||
},
|
||||
"pako": {
|
||||
"version": "1.0.11",
|
||||
"resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
|
||||
"integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==",
|
||||
"dev": true
|
||||
},
|
||||
"parent-module": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
|
||||
|
@ -12486,6 +12899,21 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"pvtsutils": {
|
||||
"version": "1.1.6",
|
||||
"resolved": "https://registry.npmjs.org/pvtsutils/-/pvtsutils-1.1.6.tgz",
|
||||
"integrity": "sha512-Tm/74+LIqWtItcZHBJztPEPqLzNKbtPAA3LoFt763PFCHxmCfrF4YXhdFEiPAxMTakR0shbVymKKyMxg1Zqt4A==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"tslib": "^2.2.0"
|
||||
}
|
||||
},
|
||||
"pvutils": {
|
||||
"version": "1.0.17",
|
||||
"resolved": "https://registry.npmjs.org/pvutils/-/pvutils-1.0.17.tgz",
|
||||
"integrity": "sha512-wLHYUQxWaXVQvKnwIDWFVKDJku9XDCvyhhxoq8dc5MFdIlRenyPI9eSfEtcvgHgD7FlvCyGAlWgOzRnZD99GZQ==",
|
||||
"dev": true
|
||||
},
|
||||
"qs": {
|
||||
"version": "6.5.2",
|
||||
"resolved": "https://registry.npmjs.org/qs/-/qs-6.5.2.tgz",
|
||||
|
@ -12544,6 +12972,24 @@
|
|||
"util-deprecate": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"redis-commands": {
|
||||
"version": "1.7.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-commands/-/redis-commands-1.7.0.tgz",
|
||||
"integrity": "sha512-nJWqw3bTFy21hX/CPKHth6sfhZbdiHP6bTawSgQBlKOVRG7EZkfHbbHwQJnrE4vsQf0CMNE+3gJ4Fmm16vdVlQ=="
|
||||
},
|
||||
"redis-errors": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-errors/-/redis-errors-1.2.0.tgz",
|
||||
"integrity": "sha1-62LSrbFeTq9GEMBK/hUpOEJQq60="
|
||||
},
|
||||
"redis-parser": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/redis-parser/-/redis-parser-3.0.0.tgz",
|
||||
"integrity": "sha1-tm2CjNyv5rS4pCin3vTGvKwxyLQ=",
|
||||
"requires": {
|
||||
"redis-errors": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"regex-not": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/regex-not/-/regex-not-1.0.2.tgz",
|
||||
|
@ -13406,6 +13852,11 @@
|
|||
"escape-string-regexp": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"standard-as-callback": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/standard-as-callback/-/standard-as-callback-2.1.0.tgz",
|
||||
"integrity": "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A=="
|
||||
},
|
||||
"static-extend": {
|
||||
"version": "0.1.2",
|
||||
"resolved": "https://registry.npmjs.org/static-extend/-/static-extend-0.1.2.tgz",
|
||||
|
@ -13776,6 +14227,12 @@
|
|||
"punycode": "^2.1.1"
|
||||
}
|
||||
},
|
||||
"tslib": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.2.0.tgz",
|
||||
"integrity": "sha512-gS9GVHRU+RGn5KQM2rllAlR3dU6m7AcpJKdtH8gFvQiC4Otgk98XnmMU+nZenHt/+VhnBPWwgrJsyrdcw6i23w==",
|
||||
"dev": true
|
||||
},
|
||||
"tunnel-agent": {
|
||||
"version": "0.6.0",
|
||||
"resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
|
||||
|
@ -13921,6 +14378,12 @@
|
|||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz",
|
||||
"integrity": "sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg=="
|
||||
},
|
||||
"uuid-random": {
|
||||
"version": "1.3.2",
|
||||
"resolved": "https://registry.npmjs.org/uuid-random/-/uuid-random-1.3.2.tgz",
|
||||
"integrity": "sha512-UOzej0Le/UgkbWEO8flm+0y+G+ljUon1QWTEZOq1rnMAsxo2+SckbiZdKzAHHlVh6gJqI1TjC/xwgR50MuCrBQ==",
|
||||
"dev": true
|
||||
},
|
||||
"v8-compile-cache": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/v8-compile-cache/-/v8-compile-cache-2.2.0.tgz",
|
||||
|
@ -13994,6 +14457,93 @@
|
|||
"makeerror": "1.0.x"
|
||||
}
|
||||
},
|
||||
"warcio": {
|
||||
"version": "1.4.5",
|
||||
"resolved": "https://registry.npmjs.org/warcio/-/warcio-1.4.5.tgz",
|
||||
"integrity": "sha512-VwFBdmEQhWHmxsdyiLM0INHD1KZ2+EGYzslZXFe6JdbuTfSF/dYRQ/wEdvp+m28mydphROF6D32KfkIMRU1NZw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@peculiar/webcrypto": "^1.1.1",
|
||||
"esm": "^3.2.25",
|
||||
"hi-base32": "^0.5.0",
|
||||
"node-fetch": "^2.6.0",
|
||||
"pako": "^1.0.11",
|
||||
"uuid-random": "^1.3.0",
|
||||
"yargs": "^15.3.1"
|
||||
},
|
||||
"dependencies": {
|
||||
"cliui": {
|
||||
"version": "6.0.0",
|
||||
"resolved": "https://registry.npmjs.org/cliui/-/cliui-6.0.0.tgz",
|
||||
"integrity": "sha512-t6wbgtoCXvAzst7QgXxJYqPt0usEfbgQdftEPbLL/cvv6HPE5VgvqCuAIDR0NgU52ds6rFwqrgakNLrHEjCbrQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"string-width": "^4.2.0",
|
||||
"strip-ansi": "^6.0.0",
|
||||
"wrap-ansi": "^6.2.0"
|
||||
}
|
||||
},
|
||||
"wrap-ansi": {
|
||||
"version": "6.2.0",
|
||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz",
|
||||
"integrity": "sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"ansi-styles": "^4.0.0",
|
||||
"string-width": "^4.1.0",
|
||||
"strip-ansi": "^6.0.0"
|
||||
}
|
||||
},
|
||||
"y18n": {
|
||||
"version": "4.0.3",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.3.tgz",
|
||||
"integrity": "sha512-JKhqTOwSrqNA1NY5lSztJ1GrBiUodLMmIZuLiDaMRJ+itFd+ABVE8XBjOvIWL+rSqNDC74LCSFmlb/U4UZ4hJQ==",
|
||||
"dev": true
|
||||
},
|
||||
"yargs": {
|
||||
"version": "15.4.1",
|
||||
"resolved": "https://registry.npmjs.org/yargs/-/yargs-15.4.1.tgz",
|
||||
"integrity": "sha512-aePbxDmcYW++PaqBsJ+HYUFwCdv4LVvdnhBy78E57PIor8/OVvhMrADFFEDh8DHDFRv/O9i3lPhsENjO7QX0+A==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"cliui": "^6.0.0",
|
||||
"decamelize": "^1.2.0",
|
||||
"find-up": "^4.1.0",
|
||||
"get-caller-file": "^2.0.1",
|
||||
"require-directory": "^2.1.1",
|
||||
"require-main-filename": "^2.0.0",
|
||||
"set-blocking": "^2.0.0",
|
||||
"string-width": "^4.2.0",
|
||||
"which-module": "^2.0.0",
|
||||
"y18n": "^4.0.0",
|
||||
"yargs-parser": "^18.1.2"
|
||||
}
|
||||
},
|
||||
"yargs-parser": {
|
||||
"version": "18.1.3",
|
||||
"resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-18.1.3.tgz",
|
||||
"integrity": "sha512-o50j0JeToy/4K6OZcaQmW6lyXXKhq7csREXcDwk2omFPJEwUNOVtJKvmDr9EI1fAJZUyZcRF7kxGBWmRXudrCQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"camelcase": "^5.0.0",
|
||||
"decamelize": "^1.2.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"webcrypto-core": {
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/webcrypto-core/-/webcrypto-core-1.2.0.tgz",
|
||||
"integrity": "sha512-p76Z/YLuE4CHCRdc49FB/ETaM4bzM3roqWNJeGs+QNY1fOTzKTOVnhmudW1fuO+5EZg6/4LG9NJ6gaAyxTk9XQ==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"@peculiar/asn1-schema": "^2.0.27",
|
||||
"@peculiar/json-schema": "^1.1.12",
|
||||
"asn1js": "^2.0.26",
|
||||
"pvtsutils": "^1.1.2",
|
||||
"tslib": "^2.1.0"
|
||||
}
|
||||
},
|
||||
"webidl-conversions": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-6.1.0.tgz",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "0.3.1",
|
||||
"version": "0.3.2",
|
||||
"main": "browsertrix-crawler",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
"author": "Ilya Kreymer <ikreymer@gmail.com>, Webrecorder Software",
|
||||
|
|
2
tests/fixtures/urlSeedFile.txt
vendored
Normal file
2
tests/fixtures/urlSeedFile.txt
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
https://www.example.org
|
||||
https://www.example.com
|
34
tests/url_file_list.test.js
Normal file
34
tests/url_file_list.test.js
Normal file
|
@ -0,0 +1,34 @@
|
|||
const util = require("util");
|
||||
const exec = util.promisify(require("child_process").exec);
|
||||
const fs = require("fs");
|
||||
|
||||
test("check that all urls in a file list are crawled when the filelisturl param is passed", async () => {
|
||||
jest.setTimeout(30000);
|
||||
|
||||
try{
|
||||
|
||||
await exec("docker-compose run -v $PWD/tests/fixtures:/app/tests/fixtures crawler crawl --collection filelisttest --urlFile tests/fixtures/urlSeedFile.txt --timeout 10000");
|
||||
}
|
||||
catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
let crawled_pages = fs.readFileSync("crawls/collections/filelisttest/pages/pages.jsonl", "utf8");
|
||||
let seed_file = fs.readFileSync("tests/fixtures/urlSeedFile.txt", "utf8").split("\n").sort();
|
||||
|
||||
let seed_file_list = [];
|
||||
for (var j = 0; j < seed_file.length; j++) {
|
||||
if (seed_file[j] != undefined){
|
||||
seed_file_list.push(seed_file[j]);
|
||||
}
|
||||
}
|
||||
|
||||
let foundSeedUrl = true;
|
||||
|
||||
for (var i = 1; i < seed_file_list.length; i++) {
|
||||
if (crawled_pages.indexOf(seed_file_list[i]) == -1){
|
||||
foundSeedUrl = false;
|
||||
}
|
||||
}
|
||||
expect(foundSeedUrl).toBe(true);
|
||||
});
|
2
urlSeedFile.txt
Normal file
2
urlSeedFile.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
https://www.example.com/
|
||||
https://www.example.org
|
Loading…
Add table
Add a link
Reference in a new issue