diff --git a/crawler.js b/crawler.js index 5ee9f694..56404e71 100644 --- a/crawler.js +++ b/crawler.js @@ -353,7 +353,7 @@ export class Crawler { } async setupPage({page, cdp, workerid}) { - await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});"); + await this.browser.setupPage({page, cdp}); if (this.params.logging.includes("jserrors")) { page.on("console", (msg) => { @@ -648,8 +648,6 @@ export class Crawler { } async crawl() { - this.profileDir = await this.browser.loadProfile(this.params.profile); - if (this.params.healthCheckPort) { this.healthChecker = new HealthChecker(this.params.healthCheckPort, this.params.workers); } @@ -731,7 +729,7 @@ export class Crawler { } await this.browser.launch({ - dataDir: this.profileDir, + profileUrl: this.params.profile, headless: this.params.headless, emulateDevice: this.emulateDevice, chromeOptions: { diff --git a/create-login-profile.js b/create-login-profile.js index 677cb25e..6f89192a 100755 --- a/create-login-profile.js +++ b/create-login-profile.js @@ -199,7 +199,8 @@ async function main() { await cdp.send("Network.setCacheDisabled", {cacheDisabled: true}); if (!params.automated) { - await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});"); + await browser.setupPage({page, cdp}); + // for testing, inject browsertrix-behaviors await page.addInitScript(behaviors + ";\nself.__bx_behaviors.init();"); } diff --git a/util/browser.js b/util/browser.js index c7b1b692..3cf439ea 100644 --- a/util/browser.js +++ b/util/browser.js @@ -11,8 +11,6 @@ import { initStorage } from "./storage.js"; import { chromium } from "playwright-core"; -const profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-")); - // ================================================================== export class Browser @@ -22,16 +20,22 @@ export class Browser this.firstPage = null; this.firstCDP = null; + + this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-")); + this.customProfile = false; } - async launch({dataDir, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) { + async launch({profileUrl, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) { if (this.context) { logger.warn("Context already inited", {}, "context"); return this.context; } + if (profileUrl) { + this.customProfile = await this.loadProfile(profileUrl); + } + const args = this.chromeArgs(chromeOptions); - const userDataDir = dataDir || profileDir; const launchOpts = { ...emulateDevice, @@ -43,10 +47,10 @@ export class Browser handleSIGHUP: signals, handleSIGINT: signals, handleSIGTERM: signals, - serviceWorkers: dataDir ? "block" : "allow", + serviceWorkers: "allow" }; - this.context = await chromium.launchPersistentContext(userDataDir, launchOpts); + this.context = await chromium.launchPersistentContext(this.profileDir, launchOpts); if (this.context.pages()) { this.firstPage = this.context.pages()[0]; @@ -65,6 +69,16 @@ export class Browser } } + async setupPage({page, cdp}) { + await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});"); + + if (this.customProfile) { + logger.info("Disabling Service Workers for profile", {}, "browser"); + + await cdp.send("Network.setBypassServiceWorker", {bypass: true}); + } + } + async getFirstPageWithCDP() { return {page: this.firstPage, cdp: this.firstCDP}; } @@ -126,17 +140,18 @@ export class Browser if (profileFilename) { try { - child_process.execSync("tar xvfz " + profileFilename, {cwd: profileDir}); + child_process.execSync("tar xvfz " + profileFilename, {cwd: this.profileDir}); + return true; } catch (e) { logger.error(`Profile filename ${profileFilename} not a valid tar.gz`); } } - return profileDir; + return false; } saveProfile(profileFilename) { - child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: profileDir}); + child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: this.profileDir}); } chromeArgs({proxy=true, userAgent=null, extraArgs=[]} = {}) { diff --git a/util/screenshots.js b/util/screenshots.js index 3bcfa000..09b34bc2 100644 --- a/util/screenshots.js +++ b/util/screenshots.js @@ -21,7 +21,7 @@ export const screenshotTypes = { "fullPage": { type: "png", omitBackground: true, - fullPage: false + fullPage: true } };