merge regression fixes from 0.9.1: full page screenshot + allow service workers if no profile used (#297)

* browser: just pass profileUrl and track if custom profile is used
browser: don't disable service workers always (accidentally added as part of playwright migration)
only disable if using profile, same as 0.8.x behavior
fix for #288

* Fix full page screenshot (#296)
---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2023-04-24 10:26:56 -07:00 committed by GitHub
parent 3c7c7bfbc4
commit d4e222fab2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 29 additions and 15 deletions

View file

@ -353,7 +353,7 @@ export class Crawler {
}
async setupPage({page, cdp, workerid}) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
await this.browser.setupPage({page, cdp});
if (this.params.logging.includes("jserrors")) {
page.on("console", (msg) => {
@ -648,8 +648,6 @@ export class Crawler {
}
async crawl() {
this.profileDir = await this.browser.loadProfile(this.params.profile);
if (this.params.healthCheckPort) {
this.healthChecker = new HealthChecker(this.params.healthCheckPort, this.params.workers);
}
@ -731,7 +729,7 @@ export class Crawler {
}
await this.browser.launch({
dataDir: this.profileDir,
profileUrl: this.params.profile,
headless: this.params.headless,
emulateDevice: this.emulateDevice,
chromeOptions: {

View file

@ -199,7 +199,8 @@ async function main() {
await cdp.send("Network.setCacheDisabled", {cacheDisabled: true});
if (!params.automated) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
await browser.setupPage({page, cdp});
// for testing, inject browsertrix-behaviors
await page.addInitScript(behaviors + ";\nself.__bx_behaviors.init();");
}

View file

@ -11,8 +11,6 @@ import { initStorage } from "./storage.js";
import { chromium } from "playwright-core";
const profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
// ==================================================================
export class Browser
@ -22,16 +20,22 @@ export class Browser
this.firstPage = null;
this.firstCDP = null;
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
this.customProfile = false;
}
async launch({dataDir, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) {
async launch({profileUrl, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) {
if (this.context) {
logger.warn("Context already inited", {}, "context");
return this.context;
}
if (profileUrl) {
this.customProfile = await this.loadProfile(profileUrl);
}
const args = this.chromeArgs(chromeOptions);
const userDataDir = dataDir || profileDir;
const launchOpts = {
...emulateDevice,
@ -43,10 +47,10 @@ export class Browser
handleSIGHUP: signals,
handleSIGINT: signals,
handleSIGTERM: signals,
serviceWorkers: dataDir ? "block" : "allow",
serviceWorkers: "allow"
};
this.context = await chromium.launchPersistentContext(userDataDir, launchOpts);
this.context = await chromium.launchPersistentContext(this.profileDir, launchOpts);
if (this.context.pages()) {
this.firstPage = this.context.pages()[0];
@ -65,6 +69,16 @@ export class Browser
}
}
async setupPage({page, cdp}) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
if (this.customProfile) {
logger.info("Disabling Service Workers for profile", {}, "browser");
await cdp.send("Network.setBypassServiceWorker", {bypass: true});
}
}
async getFirstPageWithCDP() {
return {page: this.firstPage, cdp: this.firstCDP};
}
@ -126,17 +140,18 @@ export class Browser
if (profileFilename) {
try {
child_process.execSync("tar xvfz " + profileFilename, {cwd: profileDir});
child_process.execSync("tar xvfz " + profileFilename, {cwd: this.profileDir});
return true;
} catch (e) {
logger.error(`Profile filename ${profileFilename} not a valid tar.gz`);
}
}
return profileDir;
return false;
}
saveProfile(profileFilename) {
child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: profileDir});
child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: this.profileDir});
}
chromeArgs({proxy=true, userAgent=null, extraArgs=[]} = {}) {

View file

@ -21,7 +21,7 @@ export const screenshotTypes = {
"fullPage": {
type: "png",
omitBackground: true,
fullPage: false
fullPage: true
}
};