2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import fs from "fs";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import path from "path";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import * as warcio from "warcio";
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-19 14:30:24 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import sharp from "sharp";
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-03-17 14:24:44 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import { logger } from "./logger.js";
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								// ============================================================================
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								export const screenshotTypes = {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  "view": {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    type: "png",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    omitBackground: true,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fullPage: false
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  "thumbnail": {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    type: "jpeg",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    omitBackground: true,
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-19 14:30:24 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    fullPage: false
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  },
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  "fullPage": {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    type: "png",
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    omitBackground: true,
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-24 10:26:56 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    fullPage: true
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								};
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								export class Screenshots {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-26 15:41:35 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								  constructor({browser, page, url, date, directory}) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    this.browser = browser;
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    this.page = page;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    this.url = url;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    this.directory = directory;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    this.warcName = path.join(this.directory, "screenshots.warc.gz");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    this.date = date ? date : new Date();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  async take(screenshotType="view") {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    try {
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-07 20:42:06 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      if (screenshotType !== "fullPage") {
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-26 15:41:35 -07:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        await this.browser.setViewport(this.page, {width: 1920, height: 1080});
							 | 
						
					
						
							
								
									
										
										
										
											2023-04-07 20:42:06 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      }
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      const options = screenshotTypes[screenshotType];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      const screenshotBuffer = await this.page.screenshot(options);
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-19 14:30:24 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      await this.writeBufferToWARC(screenshotBuffer, screenshotType, options.type);
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-15 12:38:41 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      logger.info(`Screenshot (type: ${screenshotType}) for ${this.url} written to ${this.warcName}`);
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } catch (e) {
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-15 12:38:41 -05:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								      logger.error(`Taking screenshot (type: ${screenshotType}) failed for ${this.url}`, e.message);
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  async takeFullPage() {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    await this.take("fullPage");
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  async takeThumbnail() {
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-19 14:30:24 -04:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    try {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      const screenshotType = "thumbnail";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      await this.browser.setViewport(this.page, {width: 1920, height: 1080});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      const options = screenshotTypes[screenshotType];
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      const screenshotBuffer = await this.page.screenshot(options);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      const thumbnailBuffer = await sharp(screenshotBuffer)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        // 16:9 thumbnail
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        .resize(640, 360)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        .toBuffer();
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      await this.writeBufferToWARC(thumbnailBuffer, screenshotType, options.type);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      logger.info(`Screenshot (type: thumbnail) for ${this.url} written to ${this.warcName}`);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    } catch (e) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      logger.error(`Taking screenshot (type: thumbnail) failed for ${this.url}`, e.message);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  async writeBufferToWARC(screenshotBuffer, screenshotType, imageType) {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const warcRecord = await this.wrap(screenshotBuffer, screenshotType, imageType);
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const warcRecordBuffer = await warcio.WARCSerializer.serialize(warcRecord, {gzip: true});
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    fs.appendFileSync(this.warcName, warcRecordBuffer);
							 | 
						
					
						
							
								
									
										
										
										
											2022-12-21 12:06:13 -05:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  async wrap(buffer, screenshotType="screenshot", imageType="png") {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const warcVersion = "WARC/1.1";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const warcRecordType = "resource";
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    const warcHeaders = {"Content-Type": `image/${imageType}`};
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    async function* content() {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      yield buffer;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    let screenshotUrl = `urn:${screenshotType}:` + this.url;
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return warcio.WARCRecord.create({
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      url: screenshotUrl,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      date: this.date.toISOString(),
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      type: warcRecordType,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      warcVersion,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								      warcHeaders}, content());
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								  }
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 |