From 540c355d25fd106b0026de340b381ac72c82c868 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:18:00 -0500 Subject: [PATCH 1/2] Bump sharp from 0.32.1 to 0.32.6 (#443) Bumps [sharp](https://github.com/lovell/sharp) from 0.32.1 to 0.32.6 to fix vulnerability --- package.json | 2 +- yarn.lock | 57 ++++++++++++++++++++++++---------------------------- 2 files changed, 27 insertions(+), 32 deletions(-) diff --git a/package.json b/package.json index 6190c2be..3e41e5ac 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "js-yaml": "^4.1.0", "minio": "7.0.26", "puppeteer-core": "^20.7.4", - "sharp": "^0.32.1", + "sharp": "^0.32.6", "sitemapper": "^3.2.5", "uuid": "8.3.2", "warcio": "^1.6.0", diff --git a/yarn.lock b/yarn.lock index bb4023a8..ed361bdc 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1542,10 +1542,10 @@ des.js@^1.0.0: inherits "^2.0.1" minimalistic-assert "^1.0.0" -detect-libc@^2.0.0, detect-libc@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.1.tgz#e1897aa88fa6ad197862937fbc0441ef352ee0cd" - integrity sha512-463v3ZeIrcWtdgIg6vI6XUncguvr2TnGl4SzDXinkt9mSLpBJKXT3mW6xT3VQdDN11+WVs29pgvivTc4Lp8v+w== +detect-libc@^2.0.0, detect-libc@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.2.tgz#8ccf2ba9315350e1241b88d0ac3b0e1fbd99605d" + integrity sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw== detect-newline@^3.0.0: version "3.1.0" @@ -3900,24 +3900,10 @@ semver@^6.0.0, semver@^6.3.0: resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.0.tgz#ee0a64c8af5e8ceea67687b133761e1becbd1d3d" integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw== -semver@^7.2.1: - version "7.3.5" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.5.tgz#0b621c879348d8998e4b0e4be94b3f12e6018ef7" - integrity sha512-PoeGJYh8HK4BTO/a9Tf6ZG3veo/A7ZVsYrSA6J8ny9nb3B1VrpkuN+z9OE5wfE5p6H4LchYZsegiQgbJD94ZFQ== - dependencies: - lru-cache "^6.0.0" - -semver@^7.3.5: - version "7.3.8" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.3.8.tgz#07a78feafb3f7b32347d725e33de7e2a2df67798" - integrity sha512-NB1ctGL5rlHrPJtFDVIVzTyQylMLu9N9VICA6HSFJo8MCGVTMW6gfpicwKmmK/dAjTOrqu5l63JJOpDSrAis3A== - dependencies: - lru-cache "^6.0.0" - -semver@^7.5.0: - version "7.5.0" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.5.0.tgz#ed8c5dc8efb6c629c88b23d41dc9bf40c1d96cd0" - integrity sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA== +semver@^7.2.1, semver@^7.3.5, semver@^7.5.4: + version "7.5.4" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.5.4.tgz#483986ec4ed38e1c6c48c34894a9182dbff68a6e" + integrity sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA== dependencies: lru-cache "^6.0.0" @@ -3934,18 +3920,18 @@ sha.js@^2.4.0, sha.js@^2.4.8: inherits "^2.0.1" safe-buffer "^5.0.1" -sharp@^0.32.1: - version "0.32.1" - resolved "https://registry.yarnpkg.com/sharp/-/sharp-0.32.1.tgz#41aa0d0b2048b2e0ee453d9fcb14ec1f408390fe" - integrity sha512-kQTFtj7ldpUqSe8kDxoGLZc1rnMFU0AO2pqbX6pLy3b7Oj8ivJIdoKNwxHVQG2HN6XpHPJqCSM2nsma2gOXvOg== +sharp@^0.32.6: + version "0.32.6" + resolved "https://registry.yarnpkg.com/sharp/-/sharp-0.32.6.tgz#6ad30c0b7cd910df65d5f355f774aa4fce45732a" + integrity sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w== dependencies: color "^4.2.3" - detect-libc "^2.0.1" + detect-libc "^2.0.2" node-addon-api "^6.1.0" prebuild-install "^7.1.1" - semver "^7.5.0" + semver "^7.5.4" simple-get "^4.0.1" - tar-fs "^2.1.1" + tar-fs "^3.0.4" tunnel-agent "^0.6.0" shebang-command@^2.0.0: @@ -4227,7 +4213,7 @@ tar-fs@3.0.3: pump "^3.0.0" tar-stream "^3.1.0" -tar-fs@^2.0.0, tar-fs@^2.1.1: +tar-fs@^2.0.0: version "2.1.1" resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng== @@ -4237,6 +4223,15 @@ tar-fs@^2.0.0, tar-fs@^2.1.1: pump "^3.0.0" tar-stream "^2.1.4" +tar-fs@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.4.tgz#a21dc60a2d5d9f55e0089ccd78124f1d3771dbbf" + integrity sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w== + dependencies: + mkdirp-classic "^0.5.2" + pump "^3.0.0" + tar-stream "^3.1.5" + tar-stream@^2.1.4: version "2.2.0" resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287" @@ -4248,7 +4243,7 @@ tar-stream@^2.1.4: inherits "^2.0.3" readable-stream "^3.1.1" -tar-stream@^3.1.0: +tar-stream@^3.1.0, tar-stream@^3.1.5: version "3.1.6" resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-3.1.6.tgz#6520607b55a06f4a2e2e04db360ba7d338cc5bab" integrity sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg== From c3b98e5047ea219336883b0b1969da425fc43456 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 16 Nov 2023 13:20:09 -0800 Subject: [PATCH 2/2] Add timeout to final awaitPendingClear() (#442) Ensure the final pending wait also has a timeout, set to max page timeout x num workers. Could also set higher, but needs to have a timeout, eg. in case of downloading live stream that never terminates. Fixes #348 in the 0.12.x line. Also bumps version to 0.12.3 --- crawler.js | 10 ++++++++-- package.json | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/crawler.js b/crawler.js index 7d8df7a4..406cc951 100644 --- a/crawler.js +++ b/crawler.js @@ -898,8 +898,14 @@ self.__bx_behaviors.selectMainBehavior(); await this.writeStats(); - // extra wait for all resources to land into WARCs - await this.awaitPendingClear(); + // extra wait with timeout for all resources to land into WARCs + await timedRun( + this.awaitPendingClear(), + this.maxPageTime * this.params.workers, + "Waiting for pending resources timed out", + {timeout: this.maxPageTime * this.params.workers}, + "general" + ); // if crawl has been stopped, mark as final exit for post-crawl tasks if (await this.crawlState.isCrawlStopped()) { diff --git a/package.json b/package.json index 3e41e5ac..5f65d12e 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "browsertrix-crawler", - "version": "0.12.2", + "version": "0.12.3", "main": "browsertrix-crawler", "type": "module", "repository": "https://github.com/webrecorder/browsertrix-crawler",