Logging and browser improvements: (#158)

* logging: add 'jserrors' option to --logging to print JS errors
* browser config: use flags from playwright
* browser: use socat to allow connecting via devtools via crawling on port 9222
This commit is contained in:
Ilya Kreymer 2022-08-21 00:30:25 -07:00 committed by GitHub
parent 6cc38bf511
commit e22d95e2f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 7 deletions

View file

@ -104,6 +104,8 @@ class Crawler {
this.sizeExceeded = false;
this.finalExit = false;
this.behaviorLastLine = null;
this.logConsole = false;
}
statusLog(...args) {
@ -228,6 +230,8 @@ class Crawler {
redisStdio = "ignore";
}
this.logConsole = this.params.logging.includes("jserrors");
this.browserExe = getBrowserExe();
this.configureUA();
@ -257,6 +261,8 @@ class Crawler {
}
});
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
if (!this.params.headless && !process.env.NO_XVFB) {
child_process.spawn("Xvfb", [
process.env.DISPLAY,
@ -708,6 +714,14 @@ class Crawler {
// more serious page error, mark page session as invalid
page.on("error", () => this.markPageFailed(page));
if (this.logConsole) {
page.on("console", (msg) => {
if (msg.type() === "error") {
console.log(msg.text(), msg.location());
}
});
}
const gotoOpts = isHTMLPage ? this.gotoOpts : "domcontentloaded";
try {

View file

@ -119,7 +119,6 @@ async function main() {
}
const browserArgs = chromeArgs(useProxy, null, [
"--remote-debugging-port=9221",
`--window-size=${params.windowSize}`,
]);

View file

@ -158,7 +158,7 @@ class ArgParser {
},
"logging": {
describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug",
describe: "Logging options for crawler, can include: stats, pywb, behaviors, behaviors-debug, jserrors",
type: "string",
default: "stats",
},

View file

@ -84,19 +84,53 @@ function getDefaultUA() {
module.exports.getDefaultUA = getDefaultUA;
// from https://github.com/microsoft/playwright/blob/main/packages/playwright-core/src/server/chromium/chromium.ts#L327
const DEFAULT_PLAYWRIGHT_FLAGS = [
"--disable-field-trial-config", // https://source.chromium.org/chromium/chromium/src/+/main:testing/variations/README.md
"--disable-background-networking",
"--enable-features=NetworkService,NetworkServiceInProcess",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-back-forward-cache", // Avoids surprises like main request not being intercepted during page.goBack().
"--disable-breakpad",
"--disable-client-side-phishing-detection",
"--disable-component-extensions-with-background-pages",
"--disable-default-apps",
"--disable-dev-shm-usage",
"--disable-extensions",
// AvoidUnnecessaryBeforeUnloadCheckSync - https://github.com/microsoft/playwright/issues/14047
// Translate - https://github.com/microsoft/playwright/issues/16126
"--disable-features=ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,DialMediaRouteProvider,AcceptCHFrame,AutoExpandDetailsElement,CertificateTransparencyComponentUpdater,AvoidUnnecessaryBeforeUnloadCheckSync,Translate",
"--allow-pre-commit-input",
"--disable-hang-monitor",
"--disable-ipc-flooding-protection",
"--disable-popup-blocking",
"--disable-prompt-on-repost",
"--disable-renderer-backgrounding",
"--disable-sync",
"--force-color-profile=srgb",
"--metrics-recording-only",
"--no-first-run",
"--enable-automation",
"--password-store=basic",
"--use-mock-keychain",
// See https://chromium-review.googlesource.com/c/chromium/src/+/2436773
"--no-service-autorun",
"--export-tagged-pdf"
];
module.exports.chromeArgs = (proxy, userAgent=null, extraArgs=[]) => {
// Chrome Flags, including proxy server
const args = [
...DEFAULT_PLAYWRIGHT_FLAGS,
...(process.env.CHROME_FLAGS ?? "").split(" ").filter(Boolean),
"--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically)
//"--no-xshm", // needed for Chrome >80 (check if puppeteer adds automatically)
"--no-sandbox",
"--disable-background-media-suspend",
"--enable-features=NetworkService,NetworkServiceInProcess",
"--remote-debugging-port=9221",
"--autoplay-policy=no-user-gesture-required",
"--disable-features=IsolateOrigins,site-per-process,ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,AcceptCHFrame,AutoExpandDetailsElement",
"--disable-site-isolation-trials",
"--disable-popup-blocking",
"--disable-backgrounding-occluded-windows",
`--user-agent=${userAgent || getDefaultUA()}`,
...extraArgs,
];