diff --git a/package.json b/package.json index 34912838..ac4be37c 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "js-yaml": "^4.1.0", "minio": "7.0.26", "puppeteer-core": "^19.11.1", + "sharp": "^0.32.1", "sitemapper": "^3.1.2", "uuid": "8.3.2", "warcio": "^1.6.0", diff --git a/util/screenshots.js b/util/screenshots.js index 807a15f6..26f64a09 100644 --- a/util/screenshots.js +++ b/util/screenshots.js @@ -1,6 +1,7 @@ import fs from "fs"; import path from "path"; import * as warcio from "warcio"; +import sharp from "sharp"; import { logger } from "./logger.js"; @@ -15,8 +16,7 @@ export const screenshotTypes = { "thumbnail": { type: "jpeg", omitBackground: true, - fullPage: false, - quality: 75 + fullPage: false }, "fullPage": { type: "png", @@ -44,9 +44,7 @@ export class Screenshots { } const options = screenshotTypes[screenshotType]; const screenshotBuffer = await this.page.screenshot(options); - const warcRecord = await this.wrap(screenshotBuffer, screenshotType, options.type); - const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true}); - fs.appendFileSync(this.warcName, warcRecordBuffer); + await this.writeBufferToWARC(screenshotBuffer, screenshotType, options.type); logger.info(`Screenshot (type: ${screenshotType}) for ${this.url} written to ${this.warcName}`); } catch (e) { logger.error(`Taking screenshot (type: ${screenshotType}) failed for ${this.url}`, e.message); @@ -58,7 +56,26 @@ export class Screenshots { } async takeThumbnail() { - await this.take("thumbnail"); + try { + const screenshotType = "thumbnail"; + await this.browser.setViewport(this.page, {width: 1920, height: 1080}); + const options = screenshotTypes[screenshotType]; + const screenshotBuffer = await this.page.screenshot(options); + const thumbnailBuffer = await sharp(screenshotBuffer) + // 16:9 thumbnail + .resize(640, 360) + .toBuffer(); + await this.writeBufferToWARC(thumbnailBuffer, screenshotType, options.type); + logger.info(`Screenshot (type: thumbnail) for ${this.url} written to ${this.warcName}`); + } catch (e) { + logger.error(`Taking screenshot (type: thumbnail) failed for ${this.url}`, e.message); + } + } + + async writeBufferToWARC(screenshotBuffer, screenshotType, imageType) { + const warcRecord = await this.wrap(screenshotBuffer, screenshotType, imageType); + const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true}); + fs.appendFileSync(this.warcName, warcRecordBuffer); } async wrap(buffer, screenshotType="screenshot", imageType="png") { diff --git a/yarn.lock b/yarn.lock index f1369269..fa5155de 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1327,11 +1327,27 @@ color-name@1.1.3: resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25" integrity sha1-p9BVi9icQveV3UIyj3QIMcpTvCU= -color-name@~1.1.4: +color-name@^1.0.0, color-name@~1.1.4: version "1.1.4" resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== +color-string@^1.9.0: + version "1.9.1" + resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.9.1.tgz#4467f9146f036f855b764dfb5bf8582bf342c7a4" + integrity sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg== + dependencies: + color-name "^1.0.0" + simple-swizzle "^0.2.2" + +color@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/color/-/color-4.2.3.tgz#d781ecb5e57224ee43ea9627560107c0e0c6463a" + integrity sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A== + dependencies: + color-convert "^2.0.1" + color-string "^1.9.0" + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -1444,6 +1460,11 @@ dedent@^0.7.0: resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c" integrity sha512-Q6fKUPqnAHAyhiUgFU7BUzLiv0kd8saH9al7tnu5Q/okj6dnupxyTgFIBjVzJATdfIAm9NAsvXNzjaKa+bxVyA== +deep-extend@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/deep-extend/-/deep-extend-0.6.0.tgz#c4fa7c95404a17a9c3e8ca7e1537312b736330ac" + integrity sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA== + deep-is@^0.1.3: version "0.1.3" resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34" @@ -1479,6 +1500,11 @@ des.js@^1.0.0: inherits "^2.0.1" minimalistic-assert "^1.0.0" +detect-libc@^2.0.0, detect-libc@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.1.tgz#e1897aa88fa6ad197862937fbc0441ef352ee0cd" + integrity sha512-463v3ZeIrcWtdgIg6vI6XUncguvr2TnGl4SzDXinkt9mSLpBJKXT3mW6xT3VQdDN11+WVs29pgvivTc4Lp8v+w== + detect-newline@^3.0.0: version "3.1.0" resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651" @@ -1800,6 +1826,11 @@ exit@^0.1.2: resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c" integrity sha1-BjJjj42HfMghB9MKD/8aF8uhzQw= +expand-template@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/expand-template/-/expand-template-2.0.3.tgz#6e14b3fcee0f3a6340ecb57d2e8918692052a47c" + integrity sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg== + expect@^29.2.1: version "29.2.1" resolved "https://registry.yarnpkg.com/expect/-/expect-29.2.1.tgz#25752d0df92d3daa5188dc8804de1f30759658cf" @@ -1977,6 +2008,11 @@ get-symbol-description@^1.0.0: call-bind "^1.0.2" get-intrinsic "^1.1.1" +github-from-package@0.0.0: + version "0.0.0" + resolved "https://registry.yarnpkg.com/github-from-package/-/github-from-package-0.0.0.tgz#97fb5d96bfde8973313f20e8288ef9a167fa64ce" + integrity sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw== + glob-parent@^5.0.0: version "5.1.2" resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" @@ -2177,6 +2213,11 @@ inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4: resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== +ini@~1.3.0: + version "1.3.8" + resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c" + integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew== + internal-slot@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.3.tgz#7347e307deeea2faac2ac6205d4bc7d34967f59c" @@ -2220,6 +2261,11 @@ is-arrayish@^0.2.1: resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d" integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0= +is-arrayish@^0.3.1: + version "0.3.2" + resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.3.2.tgz#4574a2ae56f7ab206896fb431eaeed066fdf8f03" + integrity sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ== + is-bigint@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.2.tgz#ffb381442503235ad245ea89e45b3dbff040ee5a" @@ -3032,6 +3078,11 @@ minimatch@^3.0.4: dependencies: brace-expansion "^1.1.7" +minimist@^1.2.0, minimist@^1.2.3: + version "1.2.8" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c" + integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA== + minimist@^1.2.5: version "1.2.5" resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602" @@ -3064,7 +3115,7 @@ mitt@3.0.0: resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.0.tgz#69ef9bd5c80ff6f57473e8d89326d01c414be0bd" integrity sha512-7dX2/10ITVyqh4aOSVI9gdape+t9l2/8QxHrFmUXu4EEUpdlxl6RudZUPZoc+zuY2hk1j7XxVroIVIan/pD/SQ== -mkdirp-classic@^0.5.2: +mkdirp-classic@^0.5.2, mkdirp-classic@^0.5.3: version "0.5.3" resolved "https://registry.yarnpkg.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz#fa10c9115cc6d8865be221ba47ee9bed78601113" integrity sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A== @@ -3081,11 +3132,28 @@ ms@2.1.2: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== +napi-build-utils@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806" + integrity sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg== + natural-compare@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc= +node-abi@^3.3.0: + version "3.40.0" + resolved "https://registry.yarnpkg.com/node-abi/-/node-abi-3.40.0.tgz#51d8ed44534f70ff1357dfbc3a89717b1ceac1b4" + integrity sha512-zNy02qivjjRosswoYmPi8hIKJRr8MpQyeKT6qlcq/OnOgA3Rhoae+IYOqsM9V5+JnHWmxKnWOT2GxvtqdtOCXA== + dependencies: + semver "^7.3.5" + +node-addon-api@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-6.1.0.tgz#ac8470034e58e67d0c6f1204a18ae6995d9c0d76" + integrity sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA== + node-fetch@2.6.7: version "2.6.7" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad" @@ -3343,6 +3411,24 @@ pkg-dir@^4.2.0: dependencies: find-up "^4.0.0" +prebuild-install@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/prebuild-install/-/prebuild-install-7.1.1.tgz#de97d5b34a70a0c81334fd24641f2a1702352e45" + integrity sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw== + dependencies: + detect-libc "^2.0.0" + expand-template "^2.0.3" + github-from-package "0.0.0" + minimist "^1.2.3" + mkdirp-classic "^0.5.3" + napi-build-utils "^1.0.1" + node-abi "^3.3.0" + pump "^3.0.0" + rc "^1.2.7" + simple-get "^4.0.0" + tar-fs "^2.0.0" + tunnel-agent "^0.6.0" + prelude-ls@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396" @@ -3451,6 +3537,16 @@ randomfill@^1.0.3: randombytes "^2.0.5" safe-buffer "^5.1.0" +rc@^1.2.7: + version "1.2.8" + resolved "https://registry.yarnpkg.com/rc/-/rc-1.2.8.tgz#cd924bf5200a075b83c188cd6b9e211b7fc0d3ed" + integrity sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw== + dependencies: + deep-extend "^0.6.0" + ini "~1.3.0" + minimist "^1.2.0" + strip-json-comments "~2.0.1" + react-is@^16.8.1: version "16.13.1" resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4" @@ -3629,6 +3725,13 @@ semver@^7.3.5: dependencies: lru-cache "^6.0.0" +semver@^7.5.0: + version "7.5.0" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.5.0.tgz#ed8c5dc8efb6c629c88b23d41dc9bf40c1d96cd0" + integrity sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA== + dependencies: + lru-cache "^6.0.0" + set-blocking@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7" @@ -3642,6 +3745,20 @@ sha.js@^2.4.0, sha.js@^2.4.8: inherits "^2.0.1" safe-buffer "^5.0.1" +sharp@^0.32.1: + version "0.32.1" + resolved "https://registry.yarnpkg.com/sharp/-/sharp-0.32.1.tgz#41aa0d0b2048b2e0ee453d9fcb14ec1f408390fe" + integrity sha512-kQTFtj7ldpUqSe8kDxoGLZc1rnMFU0AO2pqbX6pLy3b7Oj8ivJIdoKNwxHVQG2HN6XpHPJqCSM2nsma2gOXvOg== + dependencies: + color "^4.2.3" + detect-libc "^2.0.1" + node-addon-api "^6.1.0" + prebuild-install "^7.1.1" + semver "^7.5.0" + simple-get "^4.0.1" + tar-fs "^2.1.1" + tunnel-agent "^0.6.0" + shebang-command@^2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea" @@ -3668,6 +3785,27 @@ signal-exit@^3.0.3, signal-exit@^3.0.7: resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9" integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== +simple-concat@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/simple-concat/-/simple-concat-1.0.1.tgz#f46976082ba35c2263f1c8ab5edfe26c41c9552f" + integrity sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q== + +simple-get@^4.0.0, simple-get@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/simple-get/-/simple-get-4.0.1.tgz#4a39db549287c979d352112fa03fd99fd6bc3543" + integrity sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA== + dependencies: + decompress-response "^6.0.0" + once "^1.3.1" + simple-concat "^1.0.0" + +simple-swizzle@^0.2.2: + version "0.2.2" + resolved "https://registry.yarnpkg.com/simple-swizzle/-/simple-swizzle-0.2.2.tgz#a4da6b635ffcccca33f70d17cb92592de95e557a" + integrity sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg== + dependencies: + is-arrayish "^0.3.1" + sisteransi@^1.0.5: version "1.0.5" resolved "https://registry.yarnpkg.com/sisteransi/-/sisteransi-1.0.5.tgz#134d681297756437cc05ca01370d3a7a571075ed" @@ -3816,6 +3954,11 @@ strip-json-comments@^3.1.0, strip-json-comments@^3.1.1: resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006" integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig== +strip-json-comments@~2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz#3c531942e908c2697c0ec344858c286c7ca0a60a" + integrity sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ== + supports-color@^5.3.0: version "5.5.0" resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f" @@ -3854,7 +3997,7 @@ table@^6.0.4: string-width "^4.2.0" strip-ansi "^6.0.0" -tar-fs@2.1.1: +tar-fs@2.1.1, tar-fs@^2.0.0, tar-fs@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng== @@ -3924,6 +4067,13 @@ tr46@~0.0.3: resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a" integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== +tunnel-agent@^0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd" + integrity sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w== + dependencies: + safe-buffer "^5.0.1" + type-check@^0.4.0, type-check@~0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"