browser: just pass profileUrl and track if custom profile is used

browser: don't disable service workers always (accidentally added as part of playwright migration)
only disable if using profile, same as 0.8.x behavior
potential fix for #288
bump version to 0.9.1
This commit is contained in:
Ilya Kreymer 2023-04-22 09:56:31 -07:00
parent ebdf0ac8f8
commit 6531d52e2c
4 changed files with 29 additions and 15 deletions

View file

@ -347,7 +347,7 @@ export class Crawler {
}
async setupPage({page, cdp, workerid}) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
await this.browser.setupPage({page, cdp});
if (this.params.logging.includes("jserrors")) {
page.on("console", (msg) => {
@ -644,8 +644,6 @@ export class Crawler {
}
async crawl() {
this.profileDir = await this.browser.loadProfile(this.params.profile);
if (this.params.healthCheckPort) {
this.healthChecker = new HealthChecker(this.params.healthCheckPort, this.params.workers);
}
@ -723,7 +721,7 @@ export class Crawler {
}
await this.browser.launch({
dataDir: this.profileDir,
profileUrl: this.params.profile,
headless: this.params.headless,
emulateDevice: this.emulateDevice,
chromeOptions: {

View file

@ -199,7 +199,8 @@ async function main() {
await cdp.send("Network.setCacheDisabled", {cacheDisabled: true});
if (!params.automated) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
await browser.setupPage({page, cdp});
// for testing, inject browsertrix-behaviors
await page.addInitScript(behaviors + ";\nself.__bx_behaviors.init();");
}

View file

@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "0.9.0",
"version": "0.9.1",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",

View file

@ -11,8 +11,6 @@ import { initStorage } from "./storage.js";
import { chromium } from "playwright-core";
const profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
// ==================================================================
export class Browser
@ -22,16 +20,22 @@ export class Browser
this.firstPage = null;
this.firstCDP = null;
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
this.customProfile = false;
}
async launch({dataDir, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) {
async launch({profileUrl, chromeOptions, signals = false, headless = false, emulateDevice = {viewport: null}} = {}) {
if (this.context) {
logger.warn("Context already inited", {}, "context");
return this.context;
}
if (profileUrl) {
this.customProfile = await this.loadProfile(profileUrl);
}
const args = this.chromeArgs(chromeOptions);
const userDataDir = dataDir || profileDir;
const launchOpts = {
...emulateDevice,
@ -43,10 +47,10 @@ export class Browser
handleSIGHUP: signals,
handleSIGINT: signals,
handleSIGTERM: signals,
serviceWorkers: dataDir ? "block" : "allow",
serviceWorkers: "allow"
};
this.context = await chromium.launchPersistentContext(userDataDir, launchOpts);
this.context = await chromium.launchPersistentContext(this.profileDir, launchOpts);
if (this.context.pages()) {
this.firstPage = this.context.pages()[0];
@ -65,6 +69,16 @@ export class Browser
}
}
async setupPage({page, cdp}) {
await page.addInitScript("Object.defineProperty(navigator, \"webdriver\", {value: false});");
if (this.customProfile) {
logger.info("Disabling Service Workers for profile", {}, "browser");
await cdp.send("Network.setBypassServiceWorker", {bypass: true});
}
}
async getFirstPageWithCDP() {
return {page: this.firstPage, cdp: this.firstCDP};
}
@ -126,17 +140,18 @@ export class Browser
if (profileFilename) {
try {
child_process.execSync("tar xvfz " + profileFilename, {cwd: profileDir});
child_process.execSync("tar xvfz " + profileFilename, {cwd: this.profileDir});
return true;
} catch (e) {
logger.error(`Profile filename ${profileFilename} not a valid tar.gz`);
}
}
return profileDir;
return false;
}
saveProfile(profileFilename) {
child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: profileDir});
child_process.execFileSync("tar", ["cvfz", profileFilename, "./"], {cwd: this.profileDir});
}
chromeArgs({proxy=true, userAgent=null, extraArgs=[]} = {}) {