mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
seed urls list: check for quoted URLs and remove quotes (#883)
- check for urls that are wrapped in quotes, eg. 'https://example.com/' or "https://example.com/" and trim and remove the quotes before adding seed - tests: add quoted URL to tests, fix old.webrecorder.net test - deps: update wabac.js, RWP to latest - logging: reduce error logging for seed lists, only log once that there are duplicates or page limit is reached - fix for #882
This commit is contained in:
parent
705bc0cd9f
commit
a2742df328
8 changed files with 68 additions and 33 deletions
21
yarn.lock
21
yarn.lock
|
@ -1134,10 +1134,10 @@
|
|||
resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406"
|
||||
integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==
|
||||
|
||||
"@webrecorder/wabac@^2.23.8":
|
||||
version "2.23.8"
|
||||
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.23.8.tgz#a3eb1e605acb706b6f043ec9e7fae9ff412ccc8a"
|
||||
integrity sha512-+ShHsaBHwFC0SPFTpMWrwJHd47MzT6o1Rg12FSfGfpycrcmrBV447+JR28NitLJIsfcIif8xAth9Vh5Z7tHWlQ==
|
||||
"@webrecorder/wabac@^2.23.11":
|
||||
version "2.23.11"
|
||||
resolved "https://registry.yarnpkg.com/@webrecorder/wabac/-/wabac-2.23.11.tgz#945da06e08b6d093b525e6e5bfd6a8f17beb995b"
|
||||
integrity sha512-rsBAkcYvgX+0HgwhgvSb3cBCBp0rVnHGQS/K5A9aJwOmfymHt0C2vInH/lmKV/5H38rJu29c2cvRX962h+lUiw==
|
||||
dependencies:
|
||||
"@peculiar/asn1-ecc" "^2.3.4"
|
||||
"@peculiar/asn1-schema" "^2.3.3"
|
||||
|
@ -1151,7 +1151,6 @@
|
|||
buffer "^6.0.3"
|
||||
fast-xml-parser "^4.4.1"
|
||||
hash-wasm "^4.9.0"
|
||||
http-link-header "^1.1.3"
|
||||
http-status-codes "^2.1.4"
|
||||
idb "^7.1.1"
|
||||
js-levenshtein "^1.1.6"
|
||||
|
@ -1162,7 +1161,7 @@
|
|||
path-parser "^6.1.0"
|
||||
process "^0.11.10"
|
||||
stream-browserify "^3.0.0"
|
||||
warcio "^2.4.3"
|
||||
warcio "^2.4.5"
|
||||
|
||||
"@webrecorder/wombat@^3.8.14":
|
||||
version "3.8.14"
|
||||
|
@ -2834,7 +2833,7 @@ html-escaper@^2.0.0:
|
|||
resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
|
||||
integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
|
||||
|
||||
http-link-header@^1.1.1, http-link-header@^1.1.3:
|
||||
http-link-header@^1.1.1:
|
||||
version "1.1.3"
|
||||
resolved "https://registry.yarnpkg.com/http-link-header/-/http-link-header-1.1.3.tgz#b367b7a0ad1cf14027953f31aa1df40bb433da2a"
|
||||
integrity sha512-3cZ0SRL8fb9MUlU3mKM61FcQvPfXx2dBrZW3Vbg5CXa8jFlK8OaEpePenLe1oEXQduhz8b0QjsqfS59QP4AJDQ==
|
||||
|
@ -5527,10 +5526,10 @@ walker@^1.0.8:
|
|||
dependencies:
|
||||
makeerror "1.0.12"
|
||||
|
||||
warcio@^2.4.0, warcio@^2.4.3, warcio@^2.4.4:
|
||||
version "2.4.4"
|
||||
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.4.tgz#6c0c030bb55c0f0b824f854fa9e6718ca25d333d"
|
||||
integrity sha512-FrWOhv1qLNhPBPGEMm24Yo+DtkipK5DxK3ckVGbOf0OJ/UqaxAhiiby74q+GW70dsJV0wF+RA1ToK6CKseTshA==
|
||||
warcio@^2.4.0, warcio@^2.4.5:
|
||||
version "2.4.5"
|
||||
resolved "https://registry.yarnpkg.com/warcio/-/warcio-2.4.5.tgz#ba39c38e433491ab9016282813b9cf6539c3d808"
|
||||
integrity sha512-b6R/aIsR4fXzrpY/Zud7LqHFi2Bt8Ov5VLOnruHQ10rk129e9d0KOCZlyRmPD6ENTcV7yze5rXvJ5WSNS8R1zw==
|
||||
dependencies:
|
||||
"@types/pako" "^1.0.7"
|
||||
"@types/stream-buffers" "^3.0.7"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue