Implemented option for FullPage screenshot after the behaviours have run (#656)

- new `fullPageFinal` screenshot option, which will take a full page screenshot after behaviors are run, or before moving onto next page if behaviors are skipped.

Related to #486

---------
Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
Francesco Servida 2024-11-24 06:26:55 +01:00 committed by GitHub
parent 214eb6ca8f
commit 07e5ceb4c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 30 additions and 3 deletions

View file

@ -1031,6 +1031,23 @@ self.__bx_behaviors.selectMainBehavior();
if (textextract && this.params.text.includes("final-to-warc")) { if (textextract && this.params.text.includes("final-to-warc")) {
await textextract.extractAndStoreText("textFinal", true, true); await textextract.extractAndStoreText("textFinal", true, true);
} }
if (
this.params.screenshot &&
this.screenshotWriter &&
this.params.screenshot.includes("fullPageFinal")
) {
await page.evaluate(() => {
window.scrollTo(0, 0);
});
const screenshots = new Screenshots({
browser: this.browser,
page,
url,
writer: this.screenshotWriter,
});
await screenshots.takeFullPageFinal();
}
} }
} }
} }

View file

@ -390,7 +390,7 @@ class ArgParser {
screenshot: { screenshot: {
describe: describe:
"Screenshot options for crawler, can include: view, thumbnail, fullPage", "Screenshot options for crawler, can include: view, thumbnail, fullPage, fullPageFinal",
type: "array", type: "array",
default: [], default: [],
choices: Array.from(Object.keys(screenshotTypes)), choices: Array.from(Object.keys(screenshotTypes)),

View file

@ -15,7 +15,7 @@ type ScreenShotDesc = {
encoding: "binary"; encoding: "binary";
}; };
type ScreeshotType = "view" | "thumbnail" | "fullPage"; type ScreeshotType = "view" | "thumbnail" | "fullPage" | "fullPageFinal";
export const screenshotTypes: Record<string, ScreenShotDesc> = { export const screenshotTypes: Record<string, ScreenShotDesc> = {
view: { view: {
@ -36,6 +36,12 @@ export const screenshotTypes: Record<string, ScreenShotDesc> = {
fullPage: true, fullPage: true,
encoding: "binary", encoding: "binary",
}, },
fullPageFinal: {
type: "png",
omitBackground: true,
fullPage: true,
encoding: "binary",
},
}; };
export type ScreenshotOpts = { export type ScreenshotOpts = {
@ -63,7 +69,7 @@ export class Screenshots {
state: PageState | null = null, state: PageState | null = null,
) { ) {
try { try {
if (screenshotType !== "fullPage") { if (screenshotType !== "fullPage" && screenshotType !== "fullPageFinal") {
await this.browser.setViewport(this.page, { await this.browser.setViewport(this.page, {
width: 1920, width: 1920,
height: 1080, height: 1080,
@ -105,6 +111,10 @@ export class Screenshots {
await this.take("fullPage"); await this.take("fullPage");
} }
async takeFullPageFinal() {
await this.take("fullPageFinal");
}
async takeThumbnail() { async takeThumbnail() {
const screenshotType = "thumbnail"; const screenshotType = "thumbnail";
try { try {