diff --git a/crawler.js b/crawler.js index 5338f841..7af28ae4 100644 --- a/crawler.js +++ b/crawler.js @@ -104,6 +104,8 @@ class Crawler { this.sizeExceeded = false; this.finalExit = false; this.behaviorLastLine = null; + + this.logConsole = false; } statusLog(...args) { @@ -228,6 +230,8 @@ class Crawler { redisStdio = "ignore"; } + this.logConsole = this.params.logging.includes("jserrors"); + this.browserExe = getBrowserExe(); this.configureUA(); @@ -257,6 +261,8 @@ class Crawler { } }); + child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]); + if (!this.params.headless && !process.env.NO_XVFB) { child_process.spawn("Xvfb", [ process.env.DISPLAY, @@ -708,6 +714,14 @@ class Crawler { // more serious page error, mark page session as invalid page.on("error", () => this.markPageFailed(page)); + if (this.logConsole) { + page.on("console", (msg) => { + if (msg.type() === "error") { + console.log(msg.text(), msg.location()); + } + }); + } + const gotoOpts = isHTMLPage ? this.gotoOpts : "domcontentloaded"; try { diff --git a/create-login-profile.js b/create-login-profile.js index e281ece5..4d96df9f 100755 --- a/create-login-profile.js +++ b/create-login-profile.js @@ -119,7 +119,6 @@ async function main() { } const browserArgs = chromeArgs(useProxy, null, [ - "--remote-debugging-port=9221", `--window-size=${params.windowSize}`, ]); diff --git a/util/argParser.js b/util/argParser.js index cf4d8a22..afbc98ec 100644 --- a/util/argParser.js +++ b/util/argParser.js @@ -158,7 +158,7 @@ class ArgParser { }, "logging": { - describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug", + describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug, jserrors", type: "string", default: "stats", }, diff --git a/util/browser.js b/util/browser.js index b980a52b..54e922e9 100644 --- a/util/browser.js +++ b/util/browser.js @@ -84,19 +84,53 @@ function getDefaultUA() { module.exports.getDefaultUA = getDefaultUA; +// from https://github.com/microsoft/playwright/blob/main/packages/playwright-core/src/server/chromium/chromium.ts#L327 +const DEFAULT_PLAYWRIGHT_FLAGS = [ + "--disable-field-trial-config", // https://source.chromium.org/chromium/chromium/src/+/main:testing/variations/README.md + "--disable-background-networking", + "--enable-features=NetworkService,NetworkServiceInProcess", + "--disable-background-timer-throttling", + "--disable-backgrounding-occluded-windows", + "--disable-back-forward-cache", // Avoids surprises like main request not being intercepted during page.goBack(). + "--disable-breakpad", + "--disable-client-side-phishing-detection", + "--disable-component-extensions-with-background-pages", + "--disable-default-apps", + "--disable-dev-shm-usage", + "--disable-extensions", + // AvoidUnnecessaryBeforeUnloadCheckSync - https://github.com/microsoft/playwright/issues/14047 + // Translate - https://github.com/microsoft/playwright/issues/16126 + "--disable-features=ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,DialMediaRouteProvider,AcceptCHFrame,AutoExpandDetailsElement,CertificateTransparencyComponentUpdater,AvoidUnnecessaryBeforeUnloadCheckSync,Translate", + "--allow-pre-commit-input", + "--disable-hang-monitor", + "--disable-ipc-flooding-protection", + "--disable-popup-blocking", + "--disable-prompt-on-repost", + "--disable-renderer-backgrounding", + "--disable-sync", + "--force-color-profile=srgb", + "--metrics-recording-only", + "--no-first-run", + "--enable-automation", + "--password-store=basic", + "--use-mock-keychain", + // See https://chromium-review.googlesource.com/c/chromium/src/+/2436773 + "--no-service-autorun", + "--export-tagged-pdf" +]; + + module.exports.chromeArgs = (proxy, userAgent=null, extraArgs=[]) => { // Chrome Flags, including proxy server const args = [ + ...DEFAULT_PLAYWRIGHT_FLAGS, ...(process.env.CHROME_FLAGS ?? "").split(" ").filter(Boolean), - "--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically) + //"--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically) "--no-sandbox", "--disable-background-media-suspend", - "--enable-features=NetworkService,NetworkServiceInProcess", + "--remote-debugging-port=9221", "--autoplay-policy=no-user-gesture-required", - "--disable-features=IsolateOrigins,site-per-process,ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,AcceptCHFrame,AutoExpandDetailsElement", "--disable-site-isolation-trials", - "--disable-popup-blocking", - "--disable-backgrounding-occluded-windows", `--user-agent=${userAgent || getDefaultUA()}`, ...extraArgs, ];