mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
args parsing: fix parseRx() for inclusions/exclusions to deal with non-string types (fixes #352) (#353)
treat non-regexes as strings and pass to RegExp constructor tests: add additional scope parsing tests for different types passed in as exclusions update yargs bump to 0.10.4
This commit is contained in:
parent
16751de147
commit
5ba6c33bff
4 changed files with 91 additions and 32 deletions
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "0.10.3",
|
||||
"version": "0.10.4",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
@ -23,7 +23,7 @@
|
|||
"uuid": "8.3.2",
|
||||
"warcio": "^1.6.0",
|
||||
"ws": "^7.4.4",
|
||||
"yargs": "^16.0.3"
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint": "^7.20.0",
|
||||
|
|
|
@ -49,6 +49,26 @@ exclude: https://example.com/pathexclude
|
|||
});
|
||||
|
||||
|
||||
test("default scope + exclude is numeric", async () => {
|
||||
const seeds = getSeeds(`
|
||||
seeds:
|
||||
- https://example.com/
|
||||
|
||||
exclude: "2022"
|
||||
|
||||
`);
|
||||
|
||||
|
||||
expect(seeds.length).toEqual(1);
|
||||
expect(seeds[0].scopeType).toEqual("prefix");
|
||||
expect(seeds[0].include).toEqual([/^https?:\/\/example\.com\//]);
|
||||
expect(seeds[0].exclude).toEqual([/2022/]);
|
||||
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
test("prefix scope global + exclude", async () => {
|
||||
const seeds = getSeeds(`
|
||||
seeds:
|
||||
|
@ -271,3 +291,56 @@ exclude:
|
|||
|
||||
});
|
||||
|
||||
|
||||
test("with exclude non-string types", async () => {
|
||||
const seeds = getSeeds(`
|
||||
seeds:
|
||||
- url: https://example.com/
|
||||
exclude: "2023"
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: 2023
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: "0"
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: 0
|
||||
|
||||
- url: https://example.com/
|
||||
exclude:
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: ""
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: null
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: "null"
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: false
|
||||
|
||||
- url: https://example.com/
|
||||
exclude: true
|
||||
`);
|
||||
|
||||
expect(seeds.length).toEqual(10);
|
||||
for (let i = 0; i < 10; i++) {
|
||||
expect(seeds[i].scopeType).toEqual("prefix");
|
||||
expect(seeds[i].include).toEqual([/^https?:\/\/example\.com\//]);
|
||||
}
|
||||
|
||||
expect(seeds[0].exclude).toEqual([/2023/]);
|
||||
expect(seeds[1].exclude).toEqual([/2023/]);
|
||||
expect(seeds[2].exclude).toEqual([/0/]);
|
||||
expect(seeds[3].exclude).toEqual([/0/]);
|
||||
expect(seeds[4].exclude).toEqual([]);
|
||||
expect(seeds[5].exclude).toEqual([]);
|
||||
expect(seeds[6].exclude).toEqual([]);
|
||||
expect(seeds[7].exclude).toEqual([/null/]);
|
||||
expect(seeds[8].exclude).toEqual([/false/]);
|
||||
expect(seeds[9].exclude).toEqual([/true/]);
|
||||
|
||||
});
|
||||
|
|
|
@ -30,12 +30,12 @@ export class ScopedSeed
|
|||
}
|
||||
|
||||
parseRx(value) {
|
||||
if (!value) {
|
||||
if (value === null || value === undefined || value === "") {
|
||||
return [];
|
||||
} else if (typeof(value) === "string") {
|
||||
} else if (!(value instanceof Array)) {
|
||||
return [new RegExp(value)];
|
||||
} else {
|
||||
return value.map(e => typeof(e) === "string" ? new RegExp(e) : e);
|
||||
return value.map(e => (e instanceof RegExp) ? e : new RegExp(e));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
40
yarn.lock
40
yarn.lock
|
@ -1294,15 +1294,6 @@ cliui@^6.0.0:
|
|||
strip-ansi "^6.0.0"
|
||||
wrap-ansi "^6.2.0"
|
||||
|
||||
cliui@^7.0.2:
|
||||
version "7.0.4"
|
||||
resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f"
|
||||
integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ==
|
||||
dependencies:
|
||||
string-width "^4.2.0"
|
||||
strip-ansi "^6.0.0"
|
||||
wrap-ansi "^7.0.0"
|
||||
|
||||
cliui@^8.0.1:
|
||||
version "8.0.1"
|
||||
resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa"
|
||||
|
@ -4592,11 +4583,6 @@ yargs-parser@^18.1.2:
|
|||
camelcase "^5.0.0"
|
||||
decamelize "^1.2.0"
|
||||
|
||||
yargs-parser@^20.2.2:
|
||||
version "20.2.9"
|
||||
resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-20.2.9.tgz#2eb7dc3b0289718fc295f362753845c41a0c94ee"
|
||||
integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w==
|
||||
|
||||
yargs-parser@^21.0.0, yargs-parser@^21.1.1:
|
||||
version "21.1.1"
|
||||
resolved "https://registry.yarnpkg.com/yargs-parser/-/yargs-parser-21.1.1.tgz#9096bceebf990d21bb31fa9516e0ede294a77d35"
|
||||
|
@ -4632,19 +4618,6 @@ yargs@^15.3.1:
|
|||
y18n "^4.0.0"
|
||||
yargs-parser "^18.1.2"
|
||||
|
||||
yargs@^16.0.3:
|
||||
version "16.2.0"
|
||||
resolved "https://registry.yarnpkg.com/yargs/-/yargs-16.2.0.tgz#1c82bf0f6b6a66eafce7ef30e376f49a12477f66"
|
||||
integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw==
|
||||
dependencies:
|
||||
cliui "^7.0.2"
|
||||
escalade "^3.1.1"
|
||||
get-caller-file "^2.0.5"
|
||||
require-directory "^2.1.1"
|
||||
string-width "^4.2.0"
|
||||
y18n "^5.0.5"
|
||||
yargs-parser "^20.2.2"
|
||||
|
||||
yargs@^17.3.1:
|
||||
version "17.6.0"
|
||||
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.6.0.tgz#e134900fc1f218bc230192bdec06a0a5f973e46c"
|
||||
|
@ -4658,6 +4631,19 @@ yargs@^17.3.1:
|
|||
y18n "^5.0.5"
|
||||
yargs-parser "^21.0.0"
|
||||
|
||||
yargs@^17.7.2:
|
||||
version "17.7.2"
|
||||
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269"
|
||||
integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==
|
||||
dependencies:
|
||||
cliui "^8.0.1"
|
||||
escalade "^3.1.1"
|
||||
get-caller-file "^2.0.5"
|
||||
require-directory "^2.1.1"
|
||||
string-width "^4.2.3"
|
||||
y18n "^5.0.5"
|
||||
yargs-parser "^21.1.1"
|
||||
|
||||
yauzl@^2.10.0:
|
||||
version "2.10.0"
|
||||
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue