Improve thumbnails with sharp (#304)

* Resize thumbnails to 640x360 with sharp
This commit is contained in:
Tessa Walsh 2023-05-19 14:30:24 -04:00 committed by GitHub
parent b5df5ad3c1
commit cc606deba9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 177 additions and 9 deletions

View file

@ -18,6 +18,7 @@
"js-yaml": "^4.1.0",
"minio": "7.0.26",
"puppeteer-core": "^19.11.1",
"sharp": "^0.32.1",
"sitemapper": "^3.1.2",
"uuid": "8.3.2",
"warcio": "^1.6.0",

View file

@ -1,6 +1,7 @@
import fs from "fs";
import path from "path";
import * as warcio from "warcio";
import sharp from "sharp";
import { logger } from "./logger.js";
@ -15,8 +16,7 @@ export const screenshotTypes = {
"thumbnail": {
type: "jpeg",
omitBackground: true,
fullPage: false,
quality: 75
fullPage: false
},
"fullPage": {
type: "png",
@ -44,9 +44,7 @@ export class Screenshots {
}
const options = screenshotTypes[screenshotType];
const screenshotBuffer = await this.page.screenshot(options);
const warcRecord = await this.wrap(screenshotBuffer, screenshotType, options.type);
const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true});
fs.appendFileSync(this.warcName, warcRecordBuffer);
await this.writeBufferToWARC(screenshotBuffer, screenshotType, options.type);
logger.info(`Screenshot (type: ${screenshotType}) for ${this.url} written to ${this.warcName}`);
} catch (e) {
logger.error(`Taking screenshot (type: ${screenshotType}) failed for ${this.url}`, e.message);
@ -58,7 +56,26 @@ export class Screenshots {
}
async takeThumbnail() {
await this.take("thumbnail");
try {
const screenshotType = "thumbnail";
await this.browser.setViewport(this.page, {width: 1920, height: 1080});
const options = screenshotTypes[screenshotType];
const screenshotBuffer = await this.page.screenshot(options);
const thumbnailBuffer = await sharp(screenshotBuffer)
// 16:9 thumbnail
.resize(640, 360)
.toBuffer();
await this.writeBufferToWARC(thumbnailBuffer, screenshotType, options.type);
logger.info(`Screenshot (type: thumbnail) for ${this.url} written to ${this.warcName}`);
} catch (e) {
logger.error(`Taking screenshot (type: thumbnail) failed for ${this.url}`, e.message);
}
}
async writeBufferToWARC(screenshotBuffer, screenshotType, imageType) {
const warcRecord = await this.wrap(screenshotBuffer, screenshotType, imageType);
const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true});
fs.appendFileSync(this.warcName, warcRecordBuffer);
}
async wrap(buffer, screenshotType="screenshot", imageType="png") {

156
yarn.lock
View file

@ -1327,11 +1327,27 @@ color-name@1.1.3:
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.3.tgz#a7d0558bd89c42f795dd42328f740831ca53bc25"
integrity sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=
color-name@~1.1.4:
color-name@^1.0.0, color-name@~1.1.4:
version "1.1.4"
resolved "https://registry.yarnpkg.com/color-name/-/color-name-1.1.4.tgz#c2a09a87acbde69543de6f63fa3995c826c536a2"
integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==
color-string@^1.9.0:
version "1.9.1"
resolved "https://registry.yarnpkg.com/color-string/-/color-string-1.9.1.tgz#4467f9146f036f855b764dfb5bf8582bf342c7a4"
integrity sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==
dependencies:
color-name "^1.0.0"
simple-swizzle "^0.2.2"
color@^4.2.3:
version "4.2.3"
resolved "https://registry.yarnpkg.com/color/-/color-4.2.3.tgz#d781ecb5e57224ee43ea9627560107c0e0c6463a"
integrity sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==
dependencies:
color-convert "^2.0.1"
color-string "^1.9.0"
concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
@ -1444,6 +1460,11 @@ dedent@^0.7.0:
resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c"
integrity sha512-Q6fKUPqnAHAyhiUgFU7BUzLiv0kd8saH9al7tnu5Q/okj6dnupxyTgFIBjVzJATdfIAm9NAsvXNzjaKa+bxVyA==
deep-extend@^0.6.0:
version "0.6.0"
resolved "https://registry.yarnpkg.com/deep-extend/-/deep-extend-0.6.0.tgz#c4fa7c95404a17a9c3e8ca7e1537312b736330ac"
integrity sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==
deep-is@^0.1.3:
version "0.1.3"
resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.3.tgz#b369d6fb5dbc13eecf524f91b070feedc357cf34"
@ -1479,6 +1500,11 @@ des.js@^1.0.0:
inherits "^2.0.1"
minimalistic-assert "^1.0.0"
detect-libc@^2.0.0, detect-libc@^2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.1.tgz#e1897aa88fa6ad197862937fbc0441ef352ee0cd"
integrity sha512-463v3ZeIrcWtdgIg6vI6XUncguvr2TnGl4SzDXinkt9mSLpBJKXT3mW6xT3VQdDN11+WVs29pgvivTc4Lp8v+w==
detect-newline@^3.0.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
@ -1800,6 +1826,11 @@ exit@^0.1.2:
resolved "https://registry.yarnpkg.com/exit/-/exit-0.1.2.tgz#0632638f8d877cc82107d30a0fff1a17cba1cd0c"
integrity sha1-BjJjj42HfMghB9MKD/8aF8uhzQw=
expand-template@^2.0.3:
version "2.0.3"
resolved "https://registry.yarnpkg.com/expand-template/-/expand-template-2.0.3.tgz#6e14b3fcee0f3a6340ecb57d2e8918692052a47c"
integrity sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==
expect@^29.2.1:
version "29.2.1"
resolved "https://registry.yarnpkg.com/expect/-/expect-29.2.1.tgz#25752d0df92d3daa5188dc8804de1f30759658cf"
@ -1977,6 +2008,11 @@ get-symbol-description@^1.0.0:
call-bind "^1.0.2"
get-intrinsic "^1.1.1"
github-from-package@0.0.0:
version "0.0.0"
resolved "https://registry.yarnpkg.com/github-from-package/-/github-from-package-0.0.0.tgz#97fb5d96bfde8973313f20e8288ef9a167fa64ce"
integrity sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==
glob-parent@^5.0.0:
version "5.1.2"
resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4"
@ -2177,6 +2213,11 @@ inherits@2, inherits@^2.0.1, inherits@^2.0.3, inherits@^2.0.4:
resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c"
integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==
ini@~1.3.0:
version "1.3.8"
resolved "https://registry.yarnpkg.com/ini/-/ini-1.3.8.tgz#a29da425b48806f34767a4efce397269af28432c"
integrity sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==
internal-slot@^1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.3.tgz#7347e307deeea2faac2ac6205d4bc7d34967f59c"
@ -2220,6 +2261,11 @@ is-arrayish@^0.2.1:
resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.2.1.tgz#77c99840527aa8ecb1a8ba697b80645a7a926a9d"
integrity sha1-d8mYQFJ6qOyxqLppe4BkWnqSap0=
is-arrayish@^0.3.1:
version "0.3.2"
resolved "https://registry.yarnpkg.com/is-arrayish/-/is-arrayish-0.3.2.tgz#4574a2ae56f7ab206896fb431eaeed066fdf8f03"
integrity sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==
is-bigint@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.2.tgz#ffb381442503235ad245ea89e45b3dbff040ee5a"
@ -3032,6 +3078,11 @@ minimatch@^3.0.4:
dependencies:
brace-expansion "^1.1.7"
minimist@^1.2.0, minimist@^1.2.3:
version "1.2.8"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c"
integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==
minimist@^1.2.5:
version "1.2.5"
resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.5.tgz#67d66014b66a6a8aaa0c083c5fd58df4e4e97602"
@ -3064,7 +3115,7 @@ mitt@3.0.0:
resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.0.tgz#69ef9bd5c80ff6f57473e8d89326d01c414be0bd"
integrity sha512-7dX2/10ITVyqh4aOSVI9gdape+t9l2/8QxHrFmUXu4EEUpdlxl6RudZUPZoc+zuY2hk1j7XxVroIVIan/pD/SQ==
mkdirp-classic@^0.5.2:
mkdirp-classic@^0.5.2, mkdirp-classic@^0.5.3:
version "0.5.3"
resolved "https://registry.yarnpkg.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz#fa10c9115cc6d8865be221ba47ee9bed78601113"
integrity sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==
@ -3081,11 +3132,28 @@ ms@2.1.2:
resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009"
integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==
napi-build-utils@^1.0.1:
version "1.0.2"
resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806"
integrity sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg==
natural-compare@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
integrity sha1-Sr6/7tdUHywnrPspvbvRXI1bpPc=
node-abi@^3.3.0:
version "3.40.0"
resolved "https://registry.yarnpkg.com/node-abi/-/node-abi-3.40.0.tgz#51d8ed44534f70ff1357dfbc3a89717b1ceac1b4"
integrity sha512-zNy02qivjjRosswoYmPi8hIKJRr8MpQyeKT6qlcq/OnOgA3Rhoae+IYOqsM9V5+JnHWmxKnWOT2GxvtqdtOCXA==
dependencies:
semver "^7.3.5"
node-addon-api@^6.1.0:
version "6.1.0"
resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-6.1.0.tgz#ac8470034e58e67d0c6f1204a18ae6995d9c0d76"
integrity sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==
node-fetch@2.6.7:
version "2.6.7"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad"
@ -3343,6 +3411,24 @@ pkg-dir@^4.2.0:
dependencies:
find-up "^4.0.0"
prebuild-install@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/prebuild-install/-/prebuild-install-7.1.1.tgz#de97d5b34a70a0c81334fd24641f2a1702352e45"
integrity sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw==
dependencies:
detect-libc "^2.0.0"
expand-template "^2.0.3"
github-from-package "0.0.0"
minimist "^1.2.3"
mkdirp-classic "^0.5.3"
napi-build-utils "^1.0.1"
node-abi "^3.3.0"
pump "^3.0.0"
rc "^1.2.7"
simple-get "^4.0.0"
tar-fs "^2.0.0"
tunnel-agent "^0.6.0"
prelude-ls@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396"
@ -3451,6 +3537,16 @@ randomfill@^1.0.3:
randombytes "^2.0.5"
safe-buffer "^5.1.0"
rc@^1.2.7:
version "1.2.8"
resolved "https://registry.yarnpkg.com/rc/-/rc-1.2.8.tgz#cd924bf5200a075b83c188cd6b9e211b7fc0d3ed"
integrity sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==
dependencies:
deep-extend "^0.6.0"
ini "~1.3.0"
minimist "^1.2.0"
strip-json-comments "~2.0.1"
react-is@^16.8.1:
version "16.13.1"
resolved "https://registry.yarnpkg.com/react-is/-/react-is-16.13.1.tgz#789729a4dc36de2999dc156dd6c1d9c18cea56a4"
@ -3629,6 +3725,13 @@ semver@^7.3.5:
dependencies:
lru-cache "^6.0.0"
semver@^7.5.0:
version "7.5.0"
resolved "https://registry.yarnpkg.com/semver/-/semver-7.5.0.tgz#ed8c5dc8efb6c629c88b23d41dc9bf40c1d96cd0"
integrity sha512-+XC0AD/R7Q2mPSRuy2Id0+CGTZ98+8f+KvwirxOKIEyid+XSx6HbC63p+O4IndTHuX5Z+JxQ0TghCkO5Cg/2HA==
dependencies:
lru-cache "^6.0.0"
set-blocking@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
@ -3642,6 +3745,20 @@ sha.js@^2.4.0, sha.js@^2.4.8:
inherits "^2.0.1"
safe-buffer "^5.0.1"
sharp@^0.32.1:
version "0.32.1"
resolved "https://registry.yarnpkg.com/sharp/-/sharp-0.32.1.tgz#41aa0d0b2048b2e0ee453d9fcb14ec1f408390fe"
integrity sha512-kQTFtj7ldpUqSe8kDxoGLZc1rnMFU0AO2pqbX6pLy3b7Oj8ivJIdoKNwxHVQG2HN6XpHPJqCSM2nsma2gOXvOg==
dependencies:
color "^4.2.3"
detect-libc "^2.0.1"
node-addon-api "^6.1.0"
prebuild-install "^7.1.1"
semver "^7.5.0"
simple-get "^4.0.1"
tar-fs "^2.1.1"
tunnel-agent "^0.6.0"
shebang-command@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/shebang-command/-/shebang-command-2.0.0.tgz#ccd0af4f8835fbdc265b82461aaf0c36663f34ea"
@ -3668,6 +3785,27 @@ signal-exit@^3.0.3, signal-exit@^3.0.7:
resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9"
integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==
simple-concat@^1.0.0:
version "1.0.1"
resolved "https://registry.yarnpkg.com/simple-concat/-/simple-concat-1.0.1.tgz#f46976082ba35c2263f1c8ab5edfe26c41c9552f"
integrity sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==
simple-get@^4.0.0, simple-get@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/simple-get/-/simple-get-4.0.1.tgz#4a39db549287c979d352112fa03fd99fd6bc3543"
integrity sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==
dependencies:
decompress-response "^6.0.0"
once "^1.3.1"
simple-concat "^1.0.0"
simple-swizzle@^0.2.2:
version "0.2.2"
resolved "https://registry.yarnpkg.com/simple-swizzle/-/simple-swizzle-0.2.2.tgz#a4da6b635ffcccca33f70d17cb92592de95e557a"
integrity sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==
dependencies:
is-arrayish "^0.3.1"
sisteransi@^1.0.5:
version "1.0.5"
resolved "https://registry.yarnpkg.com/sisteransi/-/sisteransi-1.0.5.tgz#134d681297756437cc05ca01370d3a7a571075ed"
@ -3816,6 +3954,11 @@ strip-json-comments@^3.1.0, strip-json-comments@^3.1.1:
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006"
integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==
strip-json-comments@~2.0.1:
version "2.0.1"
resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-2.0.1.tgz#3c531942e908c2697c0ec344858c286c7ca0a60a"
integrity sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==
supports-color@^5.3.0:
version "5.5.0"
resolved "https://registry.yarnpkg.com/supports-color/-/supports-color-5.5.0.tgz#e2e69a44ac8772f78a1ec0b35b689df6530efc8f"
@ -3854,7 +3997,7 @@ table@^6.0.4:
string-width "^4.2.0"
strip-ansi "^6.0.0"
tar-fs@2.1.1:
tar-fs@2.1.1, tar-fs@^2.0.0, tar-fs@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784"
integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==
@ -3924,6 +4067,13 @@ tr46@~0.0.3:
resolved "https://registry.yarnpkg.com/tr46/-/tr46-0.0.3.tgz#8184fd347dac9cdc185992f3a6622e14b9d9ab6a"
integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==
tunnel-agent@^0.6.0:
version "0.6.0"
resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
integrity sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==
dependencies:
safe-buffer "^5.0.1"
type-check@^0.4.0, type-check@~0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1"