browsertrix-crawler/util/screenshots.js
Tessa Walsh cc606deba9
Improve thumbnails with sharp (#304)
* Resize thumbnails to 640x360 with sharp
2023-05-19 11:30:24 -07:00

96 lines
3 KiB
JavaScript

import fs from "fs";
import path from "path";
import * as warcio from "warcio";
import sharp from "sharp";
import { logger } from "./logger.js";
// ============================================================================
export const screenshotTypes = {
"view": {
type: "png",
omitBackground: true,
fullPage: false
},
"thumbnail": {
type: "jpeg",
omitBackground: true,
fullPage: false
},
"fullPage": {
type: "png",
omitBackground: true,
fullPage: true
}
};
export class Screenshots {
constructor({browser, page, url, date, directory}) {
this.browser = browser;
this.page = page;
this.url = url;
this.directory = directory;
this.warcName = path.join(this.directory, "screenshots.warc.gz");
this.date = date ? date : new Date();
}
async take(screenshotType="view") {
try {
if (screenshotType !== "fullPage") {
await this.browser.setViewport(this.page, {width: 1920, height: 1080});
}
const options = screenshotTypes[screenshotType];
const screenshotBuffer = await this.page.screenshot(options);
await this.writeBufferToWARC(screenshotBuffer, screenshotType, options.type);
logger.info(`Screenshot (type: ${screenshotType}) for ${this.url} written to ${this.warcName}`);
} catch (e) {
logger.error(`Taking screenshot (type: ${screenshotType}) failed for ${this.url}`, e.message);
}
}
async takeFullPage() {
await this.take("fullPage");
}
async takeThumbnail() {
try {
const screenshotType = "thumbnail";
await this.browser.setViewport(this.page, {width: 1920, height: 1080});
const options = screenshotTypes[screenshotType];
const screenshotBuffer = await this.page.screenshot(options);
const thumbnailBuffer = await sharp(screenshotBuffer)
// 16:9 thumbnail
.resize(640, 360)
.toBuffer();
await this.writeBufferToWARC(thumbnailBuffer, screenshotType, options.type);
logger.info(`Screenshot (type: thumbnail) for ${this.url} written to ${this.warcName}`);
} catch (e) {
logger.error(`Taking screenshot (type: thumbnail) failed for ${this.url}`, e.message);
}
}
async writeBufferToWARC(screenshotBuffer, screenshotType, imageType) {
const warcRecord = await this.wrap(screenshotBuffer, screenshotType, imageType);
const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true});
fs.appendFileSync(this.warcName, warcRecordBuffer);
}
async wrap(buffer, screenshotType="screenshot", imageType="png") {
const warcVersion = "WARC/1.1";
const warcRecordType = "resource";
const warcHeaders = {"Content-Type": `image/${imageType}`};
async function* content() {
yield buffer;
}
let screenshotUrl = `urn:${screenshotType}:` + this.url;
return warcio.WARCRecord.create({
url: screenshotUrl,
date: this.date.toISOString(),
type: warcRecordType,
warcVersion,
warcHeaders}, content());
}
}