mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Chrome 112 + new headless mode + consistent viewport tweaks (#316)
* base: update to chrome 112 headless: switch to using new headless mode available in 112 which is more in sync with headful mode viewport: use fixed viewport matching screen dimensions for headless and headful mode (if GEOMETRY is set) profiles: fix catching new window message, reopening page in current window versions: bump to pywb 2.7.4, update puppeteer-core to (20.2.1) bump to 0.10.0-beta.4 * profile: force reopen in current window only for headless mode (currently breaks otherwise), remove logging messages
This commit is contained in:
parent
cc606deba9
commit
f51154facb
7 changed files with 62 additions and 42 deletions
|
@ -1,4 +1,4 @@
|
||||||
ARG BROWSER_VERSION=109
|
ARG BROWSER_VERSION=112
|
||||||
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:chrome-${BROWSER_VERSION}
|
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:chrome-${BROWSER_VERSION}
|
||||||
|
|
||||||
FROM ${BROWSER_IMAGE_BASE}
|
FROM ${BROWSER_IMAGE_BASE}
|
||||||
|
|
|
@ -265,7 +265,7 @@ export class Crawler {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
|
child_process.spawn("socat", ["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"]);
|
||||||
|
|
||||||
if (!this.params.headless && !process.env.NO_XVFB) {
|
if (!this.params.headless && !process.env.NO_XVFB) {
|
||||||
child_process.spawn("Xvfb", [
|
child_process.spawn("Xvfb", [
|
||||||
|
|
|
@ -315,14 +315,13 @@ function promptInput(msg, hidden = false) {
|
||||||
class InteractiveBrowser {
|
class InteractiveBrowser {
|
||||||
constructor(params, browser, page, cdp, targetId) {
|
constructor(params, browser, page, cdp, targetId) {
|
||||||
logger.info("Creating Profile Interactively...");
|
logger.info("Creating Profile Interactively...");
|
||||||
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
|
child_process.spawn("socat", ["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"]);
|
||||||
|
|
||||||
this.params = params;
|
this.params = params;
|
||||||
this.browser = browser;
|
this.browser = browser;
|
||||||
this.page = page;
|
this.page = page;
|
||||||
this.cdp = cdp;
|
this.cdp = cdp;
|
||||||
|
|
||||||
//const target = page.target();
|
|
||||||
this.targetId = targetId;
|
this.targetId = targetId;
|
||||||
|
|
||||||
this.originSet = new Set();
|
this.originSet = new Set();
|
||||||
|
@ -331,15 +330,17 @@ class InteractiveBrowser {
|
||||||
|
|
||||||
page.on("load", () => this.handlePageLoad());
|
page.on("load", () => this.handlePageLoad());
|
||||||
|
|
||||||
page.on("popup", async () => {
|
// attempt to keep everything to initial tab if headless
|
||||||
await cdp.send("Target.activateTarget", {targetId: this.targetId});
|
if (this.params.headless) {
|
||||||
});
|
cdp.send("Page.enable");
|
||||||
|
|
||||||
cdp.on("Page.windowOpen", async (resp) => {
|
cdp.on("Page.windowOpen", async (resp) => {
|
||||||
if (resp.url) {
|
if (resp.url) {
|
||||||
await page.goto(resp.url);
|
await cdp.send("Target.activateTarget", {targetId: this.targetId});
|
||||||
}
|
await page.goto(resp.url);
|
||||||
});
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
this.shutdownWait = params.shutdownWait * 1000;
|
this.shutdownWait = params.shutdownWait * 1000;
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "browsertrix-crawler",
|
"name": "browsertrix-crawler",
|
||||||
"version": "0.10.0-beta.3",
|
"version": "0.10.0-beta.4",
|
||||||
"main": "browsertrix-crawler",
|
"main": "browsertrix-crawler",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||||
|
@ -17,7 +17,7 @@
|
||||||
"ioredis": "^4.27.1",
|
"ioredis": "^4.27.1",
|
||||||
"js-yaml": "^4.1.0",
|
"js-yaml": "^4.1.0",
|
||||||
"minio": "7.0.26",
|
"minio": "7.0.26",
|
||||||
"puppeteer-core": "^19.11.1",
|
"puppeteer-core": "^20.2.1",
|
||||||
"sharp": "^0.32.1",
|
"sharp": "^0.32.1",
|
||||||
"sitemapper": "^3.1.2",
|
"sitemapper": "^3.1.2",
|
||||||
"uuid": "8.3.2",
|
"uuid": "8.3.2",
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
pywb>=2.7.3
|
pywb>=2.7.4
|
||||||
uwsgi
|
uwsgi
|
||||||
wacz>=0.4.8
|
wacz>=0.4.8
|
||||||
requests[socks]
|
requests[socks]
|
||||||
|
|
|
@ -34,17 +34,25 @@ export class BaseBrowser
|
||||||
|
|
||||||
const args = this.chromeArgs(chromeOptions);
|
const args = this.chromeArgs(chromeOptions);
|
||||||
|
|
||||||
|
let defaultViewport = null;
|
||||||
|
|
||||||
|
if (process.env.GEOMETRY) {
|
||||||
|
const geom = process.env.GEOMETRY.split("x");
|
||||||
|
|
||||||
|
defaultViewport = {width: Number(geom[0]), height: Number(geom[1])};
|
||||||
|
}
|
||||||
|
|
||||||
const launchOpts = {
|
const launchOpts = {
|
||||||
args,
|
args,
|
||||||
headless,
|
headless: headless ? "new" : false,
|
||||||
executablePath: this.getBrowserExe(),
|
executablePath: this.getBrowserExe(),
|
||||||
ignoreDefaultArgs: ["--enable-automation"],
|
ignoreDefaultArgs: ["--enable-automation", "--hide-scrollbars"],
|
||||||
ignoreHTTPSErrors: true,
|
ignoreHTTPSErrors: true,
|
||||||
handleSIGHUP: signals,
|
handleSIGHUP: signals,
|
||||||
handleSIGINT: signals,
|
handleSIGINT: signals,
|
||||||
handleSIGTERM: signals,
|
handleSIGTERM: signals,
|
||||||
|
|
||||||
defaultViewport: null,
|
defaultViewport,
|
||||||
waitForInitialPage: false,
|
waitForInitialPage: false,
|
||||||
userDataDir: this.profileDir
|
userDataDir: this.profileDir
|
||||||
};
|
};
|
||||||
|
@ -114,6 +122,7 @@ export class BaseBrowser
|
||||||
"--no-sandbox",
|
"--no-sandbox",
|
||||||
"--disable-background-media-suspend",
|
"--disable-background-media-suspend",
|
||||||
"--remote-debugging-port=9221",
|
"--remote-debugging-port=9221",
|
||||||
|
"--remote-allow-origins=*",
|
||||||
"--autoplay-policy=no-user-gesture-required",
|
"--autoplay-policy=no-user-gesture-required",
|
||||||
"--disable-site-isolation-trials",
|
"--disable-site-isolation-trials",
|
||||||
`--user-agent=${userAgent || this.getDefaultUA()}`,
|
`--user-agent=${userAgent || this.getDefaultUA()}`,
|
||||||
|
|
58
yarn.lock
58
yarn.lock
|
@ -620,13 +620,14 @@
|
||||||
resolved "https://registry.yarnpkg.com/@novnc/novnc/-/novnc-1.4.0.tgz#68adae81a741624142b518323441e852c1f34281"
|
resolved "https://registry.yarnpkg.com/@novnc/novnc/-/novnc-1.4.0.tgz#68adae81a741624142b518323441e852c1f34281"
|
||||||
integrity sha512-kW6ALMc5BuH08e/ond/I1naYcfjc19JYMN1EdtmgjjjzPGCjW8fMtVM3MwM6q7YLRjPlQ3orEvoKMgSS7RkEVQ==
|
integrity sha512-kW6ALMc5BuH08e/ond/I1naYcfjc19JYMN1EdtmgjjjzPGCjW8fMtVM3MwM6q7YLRjPlQ3orEvoKMgSS7RkEVQ==
|
||||||
|
|
||||||
"@puppeteer/browsers@0.5.0":
|
"@puppeteer/browsers@1.3.0":
|
||||||
version "0.5.0"
|
version "1.3.0"
|
||||||
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-0.5.0.tgz#1a1ee454b84a986b937ca2d93146f25a3fe8b670"
|
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.3.0.tgz#5ad26540ff54e8b8fca8ab50d2da9c60360a21b9"
|
||||||
integrity sha512-Uw6oB7VvmPRLE4iKsjuOh8zgDabhNX67dzo8U/BB0f9527qx+4eeUs+korU98OhG5C4ubg7ufBgVi63XYwS6TQ==
|
integrity sha512-an3QdbNPkuU6qpxpbssxAbjRLJcF+eP4L8UqIY3+6n0sbaVxw5pz7PiCLy9g32XEZuoamUlV5ZQPnA6FxvkIHA==
|
||||||
dependencies:
|
dependencies:
|
||||||
debug "4.3.4"
|
debug "4.3.4"
|
||||||
extract-zip "2.0.1"
|
extract-zip "2.0.1"
|
||||||
|
http-proxy-agent "5.0.0"
|
||||||
https-proxy-agent "5.0.1"
|
https-proxy-agent "5.0.1"
|
||||||
progress "2.0.3"
|
progress "2.0.3"
|
||||||
proxy-from-env "1.1.0"
|
proxy-from-env "1.1.0"
|
||||||
|
@ -665,6 +666,11 @@
|
||||||
dependencies:
|
dependencies:
|
||||||
defer-to-connect "^2.0.0"
|
defer-to-connect "^2.0.0"
|
||||||
|
|
||||||
|
"@tootallnate/once@2":
|
||||||
|
version "2.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-2.0.0.tgz#f544a148d3ab35801c1f633a7441fd87c2e484bf"
|
||||||
|
integrity sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==
|
||||||
|
|
||||||
"@types/babel__core@^7.1.14":
|
"@types/babel__core@^7.1.14":
|
||||||
version "7.1.19"
|
version "7.1.19"
|
||||||
resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.19.tgz#7b497495b7d1b4812bdb9d02804d0576f43ee460"
|
resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.19.tgz#7b497495b7d1b4812bdb9d02804d0576f43ee460"
|
||||||
|
@ -1234,10 +1240,10 @@ chownr@^1.1.1:
|
||||||
resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
|
resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
|
||||||
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
|
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
|
||||||
|
|
||||||
chromium-bidi@0.4.7:
|
chromium-bidi@0.4.9:
|
||||||
version "0.4.7"
|
version "0.4.9"
|
||||||
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.7.tgz#4c022c2b0fb1d1c9b571fadf373042160e71d236"
|
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.9.tgz#a1c6d7497e2b8ae3d639fd69dacb25025fa0a696"
|
||||||
integrity sha512-6+mJuFXwTMU6I3vYLs6IL8A1DyQTPjCfIL971X0aMPVGRbGnNfl6i6Cl0NMbxi2bRYLGESt9T2ZIMRM5PAEcIQ==
|
integrity sha512-u3DC6XwgLCA9QJ5ak1voPslCmacQdulZNCPsI3qNXxSnEcZS7DFIbww+5RM2bznMEje7cc0oydavRLRvOIZtHw==
|
||||||
dependencies:
|
dependencies:
|
||||||
mitt "3.0.0"
|
mitt "3.0.0"
|
||||||
|
|
||||||
|
@ -1510,10 +1516,10 @@ detect-newline@^3.0.0:
|
||||||
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
|
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
|
||||||
integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
|
integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
|
||||||
|
|
||||||
devtools-protocol@0.0.1107588:
|
devtools-protocol@0.0.1120988:
|
||||||
version "0.0.1107588"
|
version "0.0.1120988"
|
||||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1107588.tgz#f8cac707840b97cc30b029359341bcbbb0ad8ffa"
|
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1120988.tgz#8fe49088919ae3b8df7235774633763f1f925066"
|
||||||
integrity sha512-yIR+pG9x65Xko7bErCUSQaDLrO/P1p3JUzEk7JCU4DowPcGHkTGUGQapcfcLc4qj0UaALwZ+cr0riFgiqpixcg==
|
integrity sha512-39fCpE3Z78IaIPChJsP6Lhmkbf4dWXOmzLk/KFTdRkNk/0JymRIfUynDVRndV9HoDz8PyalK1UH21ST/ivwW5Q==
|
||||||
|
|
||||||
diff-sequences@^29.2.0:
|
diff-sequences@^29.2.0:
|
||||||
version "29.2.0"
|
version "29.2.0"
|
||||||
|
@ -2148,6 +2154,15 @@ http-cache-semantics@^4.0.0:
|
||||||
resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390"
|
resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390"
|
||||||
integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==
|
integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==
|
||||||
|
|
||||||
|
http-proxy-agent@5.0.0:
|
||||||
|
version "5.0.0"
|
||||||
|
resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz#5129800203520d434f142bc78ff3c170800f2b43"
|
||||||
|
integrity sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==
|
||||||
|
dependencies:
|
||||||
|
"@tootallnate/once" "2"
|
||||||
|
agent-base "6"
|
||||||
|
debug "4"
|
||||||
|
|
||||||
http2-wrapper@^1.0.0-beta.5.2:
|
http2-wrapper@^1.0.0-beta.5.2:
|
||||||
version "1.0.3"
|
version "1.0.3"
|
||||||
resolved "https://registry.yarnpkg.com/http2-wrapper/-/http2-wrapper-1.0.3.tgz#b8f55e0c1f25d4ebd08b3b0c2c079f9590800b3d"
|
resolved "https://registry.yarnpkg.com/http2-wrapper/-/http2-wrapper-1.0.3.tgz#b8f55e0c1f25d4ebd08b3b0c2c079f9590800b3d"
|
||||||
|
@ -3495,21 +3510,16 @@ punycode@^2.1.0:
|
||||||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
|
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
|
||||||
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
|
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
|
||||||
|
|
||||||
puppeteer-core@^19.11.1:
|
puppeteer-core@^20.2.1:
|
||||||
version "19.11.1"
|
version "20.2.1"
|
||||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-19.11.1.tgz#4c63d7a0a6cd268ff054ebcac315b646eee32667"
|
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.2.1.tgz#e920e06e76ef2d1a1d98ea847611a8d662d3cd97"
|
||||||
integrity sha512-qcuC2Uf0Fwdj9wNtaTZ2OvYRraXpAK+puwwVW8ofOhOgLPZyz1c68tsorfIZyCUOpyBisjr+xByu7BMbEYMepA==
|
integrity sha512-HmNMcL+g9ght0nCzS5BmEvom2IVhp1/xhlQ9O+fmihQx0EYwYUqgCQhs8Hbv7IapiqKhahKCC8PKfCNTXXsKbQ==
|
||||||
dependencies:
|
dependencies:
|
||||||
"@puppeteer/browsers" "0.5.0"
|
"@puppeteer/browsers" "1.3.0"
|
||||||
chromium-bidi "0.4.7"
|
chromium-bidi "0.4.9"
|
||||||
cross-fetch "3.1.5"
|
cross-fetch "3.1.5"
|
||||||
debug "4.3.4"
|
debug "4.3.4"
|
||||||
devtools-protocol "0.0.1107588"
|
devtools-protocol "0.0.1120988"
|
||||||
extract-zip "2.0.1"
|
|
||||||
https-proxy-agent "5.0.1"
|
|
||||||
proxy-from-env "1.1.0"
|
|
||||||
tar-fs "2.1.1"
|
|
||||||
unbzip2-stream "1.4.3"
|
|
||||||
ws "8.13.0"
|
ws "8.13.0"
|
||||||
|
|
||||||
querystring@0.2.0:
|
querystring@0.2.0:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue