mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
seed urls list: check for quoted URLs and remove quotes (#883)
- check for urls that are wrapped in quotes, eg. 'https://example.com/' or "https://example.com/" and trim and remove the quotes before adding seed - tests: add quoted URL to tests, fix old.webrecorder.net test - deps: update wabac.js, RWP to latest - logging: reduce error logging for seed lists, only log once that there are duplicates or page limit is reached - fix for #882
This commit is contained in:
parent
705bc0cd9f
commit
a2742df328
8 changed files with 68 additions and 33 deletions
|
@ -18,7 +18,7 @@
|
|||
"dependencies": {
|
||||
"@novnc/novnc": "1.4.0",
|
||||
"@puppeteer/replay": "^3.1.1",
|
||||
"@webrecorder/wabac": "^2.23.8",
|
||||
"@webrecorder/wabac": "^2.23.11",
|
||||
"browsertrix-behaviors": "^0.9.2",
|
||||
"client-zip": "^2.4.5",
|
||||
"css-selector-parser": "^3.0.5",
|
||||
|
@ -39,7 +39,7 @@
|
|||
"tsc": "^2.0.4",
|
||||
"undici": "^6.18.2",
|
||||
"uuid": "8.3.2",
|
||||
"warcio": "^2.4.4",
|
||||
"warcio": "^2.4.5",
|
||||
"ws": "^7.4.4",
|
||||
"yargs": "^17.7.2"
|
||||
},
|
||||
|
@ -71,7 +71,7 @@
|
|||
},
|
||||
"resolutions": {
|
||||
"wrap-ansi": "7.0.0",
|
||||
"warcio": "^2.4.4",
|
||||
"warcio": "^2.4.5",
|
||||
"@novnc/novnc": "1.4.0"
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue