Chrome 112 + new headless mode + consistent viewport tweaks (#316)

* base: update to chrome 112
headless: switch to using new headless mode available in 112 which is more in sync with headful mode
viewport: use fixed viewport matching screen dimensions for headless and headful mode (if GEOMETRY is set)
profiles: fix catching new window message, reopening page in current window
versions: bump to pywb 2.7.4, update puppeteer-core to (20.2.1)
bump to 0.10.0-beta.4

* profile: force reopen in current window only for headless mode (currently breaks otherwise), remove logging messages
This commit is contained in:
Ilya Kreymer 2023-05-22 16:24:39 -07:00 committed by GitHub
parent cc606deba9
commit f51154facb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 62 additions and 42 deletions

View file

@ -1,4 +1,4 @@
ARG BROWSER_VERSION=109
ARG BROWSER_VERSION=112
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:chrome-${BROWSER_VERSION}
FROM ${BROWSER_IMAGE_BASE}

View file

@ -265,7 +265,7 @@ export class Crawler {
}
});
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
child_process.spawn("socat", ["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"]);
if (!this.params.headless && !process.env.NO_XVFB) {
child_process.spawn("Xvfb", [

View file

@ -315,14 +315,13 @@ function promptInput(msg, hidden = false) {
class InteractiveBrowser {
constructor(params, browser, page, cdp, targetId) {
logger.info("Creating Profile Interactively...");
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
child_process.spawn("socat", ["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"]);
this.params = params;
this.browser = browser;
this.page = page;
this.cdp = cdp;
//const target = page.target();
this.targetId = targetId;
this.originSet = new Set();
@ -331,15 +330,17 @@ class InteractiveBrowser {
page.on("load", () => this.handlePageLoad());
page.on("popup", async () => {
await cdp.send("Target.activateTarget", {targetId: this.targetId});
});
// attempt to keep everything to initial tab if headless
if (this.params.headless) {
cdp.send("Page.enable");
cdp.on("Page.windowOpen", async (resp) => {
if (resp.url) {
await page.goto(resp.url);
}
});
cdp.on("Page.windowOpen", async (resp) => {
if (resp.url) {
await cdp.send("Target.activateTarget", {targetId: this.targetId});
await page.goto(resp.url);
}
});
}
this.shutdownWait = params.shutdownWait * 1000;

View file

@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "0.10.0-beta.3",
"version": "0.10.0-beta.4",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",
@ -17,7 +17,7 @@
"ioredis": "^4.27.1",
"js-yaml": "^4.1.0",
"minio": "7.0.26",
"puppeteer-core": "^19.11.1",
"puppeteer-core": "^20.2.1",
"sharp": "^0.32.1",
"sitemapper": "^3.1.2",
"uuid": "8.3.2",

View file

@ -1,4 +1,4 @@
pywb>=2.7.3
pywb>=2.7.4
uwsgi
wacz>=0.4.8
requests[socks]

View file

@ -34,17 +34,25 @@ export class BaseBrowser
const args = this.chromeArgs(chromeOptions);
let defaultViewport = null;
if (process.env.GEOMETRY) {
const geom = process.env.GEOMETRY.split("x");
defaultViewport = {width: Number(geom[0]), height: Number(geom[1])};
}
const launchOpts = {
args,
headless,
headless: headless ? "new" : false,
executablePath: this.getBrowserExe(),
ignoreDefaultArgs: ["--enable-automation"],
ignoreDefaultArgs: ["--enable-automation", "--hide-scrollbars"],
ignoreHTTPSErrors: true,
handleSIGHUP: signals,
handleSIGINT: signals,
handleSIGTERM: signals,
defaultViewport: null,
defaultViewport,
waitForInitialPage: false,
userDataDir: this.profileDir
};
@ -114,6 +122,7 @@ export class BaseBrowser
"--no-sandbox",
"--disable-background-media-suspend",
"--remote-debugging-port=9221",
"--remote-allow-origins=*",
"--autoplay-policy=no-user-gesture-required",
"--disable-site-isolation-trials",
`--user-agent=${userAgent || this.getDefaultUA()}`,

View file

@ -620,13 +620,14 @@
resolved "https://registry.yarnpkg.com/@novnc/novnc/-/novnc-1.4.0.tgz#68adae81a741624142b518323441e852c1f34281"
integrity sha512-kW6ALMc5BuH08e/ond/I1naYcfjc19JYMN1EdtmgjjjzPGCjW8fMtVM3MwM6q7YLRjPlQ3orEvoKMgSS7RkEVQ==
"@puppeteer/browsers@0.5.0":
version "0.5.0"
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-0.5.0.tgz#1a1ee454b84a986b937ca2d93146f25a3fe8b670"
integrity sha512-Uw6oB7VvmPRLE4iKsjuOh8zgDabhNX67dzo8U/BB0f9527qx+4eeUs+korU98OhG5C4ubg7ufBgVi63XYwS6TQ==
"@puppeteer/browsers@1.3.0":
version "1.3.0"
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.3.0.tgz#5ad26540ff54e8b8fca8ab50d2da9c60360a21b9"
integrity sha512-an3QdbNPkuU6qpxpbssxAbjRLJcF+eP4L8UqIY3+6n0sbaVxw5pz7PiCLy9g32XEZuoamUlV5ZQPnA6FxvkIHA==
dependencies:
debug "4.3.4"
extract-zip "2.0.1"
http-proxy-agent "5.0.0"
https-proxy-agent "5.0.1"
progress "2.0.3"
proxy-from-env "1.1.0"
@ -665,6 +666,11 @@
dependencies:
defer-to-connect "^2.0.0"
"@tootallnate/once@2":
version "2.0.0"
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-2.0.0.tgz#f544a148d3ab35801c1f633a7441fd87c2e484bf"
integrity sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==
"@types/babel__core@^7.1.14":
version "7.1.19"
resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.19.tgz#7b497495b7d1b4812bdb9d02804d0576f43ee460"
@ -1234,10 +1240,10 @@ chownr@^1.1.1:
resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
chromium-bidi@0.4.7:
version "0.4.7"
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.7.tgz#4c022c2b0fb1d1c9b571fadf373042160e71d236"
integrity sha512-6+mJuFXwTMU6I3vYLs6IL8A1DyQTPjCfIL971X0aMPVGRbGnNfl6i6Cl0NMbxi2bRYLGESt9T2ZIMRM5PAEcIQ==
chromium-bidi@0.4.9:
version "0.4.9"
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.9.tgz#a1c6d7497e2b8ae3d639fd69dacb25025fa0a696"
integrity sha512-u3DC6XwgLCA9QJ5ak1voPslCmacQdulZNCPsI3qNXxSnEcZS7DFIbww+5RM2bznMEje7cc0oydavRLRvOIZtHw==
dependencies:
mitt "3.0.0"
@ -1510,10 +1516,10 @@ detect-newline@^3.0.0:
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
devtools-protocol@0.0.1107588:
version "0.0.1107588"
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1107588.tgz#f8cac707840b97cc30b029359341bcbbb0ad8ffa"
integrity sha512-yIR+pG9x65Xko7bErCUSQaDLrO/P1p3JUzEk7JCU4DowPcGHkTGUGQapcfcLc4qj0UaALwZ+cr0riFgiqpixcg==
devtools-protocol@0.0.1120988:
version "0.0.1120988"
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1120988.tgz#8fe49088919ae3b8df7235774633763f1f925066"
integrity sha512-39fCpE3Z78IaIPChJsP6Lhmkbf4dWXOmzLk/KFTdRkNk/0JymRIfUynDVRndV9HoDz8PyalK1UH21ST/ivwW5Q==
diff-sequences@^29.2.0:
version "29.2.0"
@ -2148,6 +2154,15 @@ http-cache-semantics@^4.0.0:
resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390"
integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==
http-proxy-agent@5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz#5129800203520d434f142bc78ff3c170800f2b43"
integrity sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==
dependencies:
"@tootallnate/once" "2"
agent-base "6"
debug "4"
http2-wrapper@^1.0.0-beta.5.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/http2-wrapper/-/http2-wrapper-1.0.3.tgz#b8f55e0c1f25d4ebd08b3b0c2c079f9590800b3d"
@ -3495,21 +3510,16 @@ punycode@^2.1.0:
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
puppeteer-core@^19.11.1:
version "19.11.1"
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-19.11.1.tgz#4c63d7a0a6cd268ff054ebcac315b646eee32667"
integrity sha512-qcuC2Uf0Fwdj9wNtaTZ2OvYRraXpAK+puwwVW8ofOhOgLPZyz1c68tsorfIZyCUOpyBisjr+xByu7BMbEYMepA==
puppeteer-core@^20.2.1:
version "20.2.1"
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.2.1.tgz#e920e06e76ef2d1a1d98ea847611a8d662d3cd97"
integrity sha512-HmNMcL+g9ght0nCzS5BmEvom2IVhp1/xhlQ9O+fmihQx0EYwYUqgCQhs8Hbv7IapiqKhahKCC8PKfCNTXXsKbQ==
dependencies:
"@puppeteer/browsers" "0.5.0"
chromium-bidi "0.4.7"
"@puppeteer/browsers" "1.3.0"
chromium-bidi "0.4.9"
cross-fetch "3.1.5"
debug "4.3.4"
devtools-protocol "0.0.1107588"
extract-zip "2.0.1"
https-proxy-agent "5.0.1"
proxy-from-env "1.1.0"
tar-fs "2.1.1"
unbzip2-stream "1.4.3"
devtools-protocol "0.0.1120988"
ws "8.13.0"
querystring@0.2.0: