merge regression fixes from 0.9.1: full page screenshot + allow service workers if no profile used (#297)

* browser: just pass profileUrl and track if custom profile is used
browser: don't disable service workers always (accidentally added as part of playwright migration)
only disable if using profile, same as 0.8.x behavior
fix for #288

* Fix full page screenshot (#296)
---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2023-04-24 10:26:56 -07:00 committed by GitHub
parent 3c7c7bfbc4
commit d4e222fab2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 15 deletions

View file

@ -353,7 +353,7 @@ export class Crawler {
} }
async setupPage({page, cdp, workerid}) { async setupPage({page, cdp, workerid}) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});"); await this.browser.setupPage({page, cdp});
if (this.params.logging.includes("jserrors")) { if (this.params.logging.includes("jserrors")) {
page.on("console", (msg) => { page.on("console", (msg) => {
@ -648,8 +648,6 @@ export class Crawler {
} }
async crawl() { async crawl() {
this.profileDir = await this.browser.loadProfile(this.params.profile);
if (this.params.healthCheckPort) { if (this.params.healthCheckPort) {
this.healthChecker = new HealthChecker(this.params.healthCheckPort, this.params.workers); this.healthChecker = new HealthChecker(this.params.healthCheckPort, this.params.workers);
} }
@ -731,7 +729,7 @@ export class Crawler {
} }
await this.browser.launch({ await this.browser.launch({
dataDir: this.profileDir, profileUrl: this.params.profile,
headless: this.params.headless, headless: this.params.headless,
emulateDevice: this.emulateDevice, emulateDevice: this.emulateDevice,
chromeOptions: { chromeOptions: {

View file

@ -199,7 +199,8 @@ async function main() {
await cdp.send("Network.setCacheDisabled", {cacheDisabled: true}); await cdp.send("Network.setCacheDisabled", {cacheDisabled: true});
if (!params.automated) { if (!params.automated) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});"); await browser.setupPage({page, cdp});
// for testing, inject browsertrix-behaviors // for testing, inject browsertrix-behaviors
await page.addInitScript(behaviors + ";\nself.__bx_behaviors.init();"); await page.addInitScript(behaviors + ";\nself.__bx_behaviors.init();");
} }

View file

@ -11,8 +11,6 @@ import { initStorage } from "./storage.js";
import { chromium } from "playwright-core"; import { chromium } from "playwright-core";
const profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
// ================================================================== // ==================================================================
export class Browser export class Browser
@ -22,16 +20,22 @@ export class Browser
this.firstPage = null; this.firstPage = null;
this.firstCDP = null; this.firstCDP = null;
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
this.customProfile = false;
} }
async launch({dataDir, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) { async launch({profileUrl, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) {
if (this.context) { if (this.context) {
logger.warn("Context already inited", {}, "context"); logger.warn("Context already inited", {}, "context");
return this.context; return this.context;
} }
if (profileUrl) {
this.customProfile = await this.loadProfile(profileUrl);
}
const args = this.chromeArgs(chromeOptions); const args = this.chromeArgs(chromeOptions);
const userDataDir = dataDir || profileDir;
const launchOpts = { const launchOpts = {
...emulateDevice, ...emulateDevice,
@ -43,10 +47,10 @@ export class Browser
handleSIGHUP: signals, handleSIGHUP: signals,
handleSIGINT: signals, handleSIGINT: signals,
handleSIGTERM: signals, handleSIGTERM: signals,
serviceWorkers: dataDir ? "block" : "allow", serviceWorkers: "allow"
}; };
this.context = await chromium.launchPersistentContext(userDataDir, launchOpts); this.context = await chromium.launchPersistentContext(this.profileDir, launchOpts);
if (this.context.pages()) { if (this.context.pages()) {
this.firstPage = this.context.pages()[0]; this.firstPage = this.context.pages()[0];
@ -65,6 +69,16 @@ export class Browser
} }
} }
async setupPage({page, cdp}) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
if (this.customProfile) {
logger.info("Disabling Service Workers for profile", {}, "browser");
await cdp.send("Network.setBypassServiceWorker", {bypass: true});
}
}
async getFirstPageWithCDP() { async getFirstPageWithCDP() {
return {page: this.firstPage, cdp: this.firstCDP}; return {page: this.firstPage, cdp: this.firstCDP};
} }
@ -126,17 +140,18 @@ export class Browser
if (profileFilename) { if (profileFilename) {
try { try {
child_process.execSync("tar xvfz " + profileFilename, {cwd: profileDir}); child_process.execSync("tar xvfz " + profileFilename, {cwd: this.profileDir});
return true;
} catch (e) { } catch (e) {
logger.error(`Profile filename ${profileFilename} not a valid tar.gz`); logger.error(`Profile filename ${profileFilename} not a valid tar.gz`);
} }
} }
return profileDir; return false;
} }
saveProfile(profileFilename) { saveProfile(profileFilename) {
child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: profileDir}); child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: this.profileDir});
} }
chromeArgs({proxy=true, userAgent=null, extraArgs=[]} = {}) { chromeArgs({proxy=true, userAgent=null, extraArgs=[]} = {}) {

View file

@ -21,7 +21,7 @@ export const screenshotTypes = {
"fullPage": { "fullPage": {
type: "png", type: "png",
omitBackground: true, omitBackground: true,
fullPage: false fullPage: true
} }
}; };