mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Chrome 112 + new headless mode + consistent viewport tweaks (#316)
* base: update to chrome 112 headless: switch to using new headless mode available in 112 which is more in sync with headful mode viewport: use fixed viewport matching screen dimensions for headless and headful mode (if GEOMETRY is set) profiles: fix catching new window message, reopening page in current window versions: bump to pywb 2.7.4, update puppeteer-core to (20.2.1) bump to 0.10.0-beta.4 * profile: force reopen in current window only for headless mode (currently breaks otherwise), remove logging messages
This commit is contained in:
parent
cc606deba9
commit
f51154facb
7 changed files with 62 additions and 42 deletions
|
@ -1,4 +1,4 @@
|
|||
ARG BROWSER_VERSION=109
|
||||
ARG BROWSER_VERSION=112
|
||||
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:chrome-${BROWSER_VERSION}
|
||||
|
||||
FROM ${BROWSER_IMAGE_BASE}
|
||||
|
|
|
@ -265,7 +265,7 @@ export class Crawler {
|
|||
}
|
||||
});
|
||||
|
||||
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
|
||||
child_process.spawn("socat", ["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"]);
|
||||
|
||||
if (!this.params.headless && !process.env.NO_XVFB) {
|
||||
child_process.spawn("Xvfb", [
|
||||
|
|
|
@ -315,14 +315,13 @@ function promptInput(msg, hidden = false) {
|
|||
class InteractiveBrowser {
|
||||
constructor(params, browser, page, cdp, targetId) {
|
||||
logger.info("Creating Profile Interactively...");
|
||||
child_process.spawn("socat", ["tcp-listen:9222,fork", "tcp:localhost:9221"]);
|
||||
child_process.spawn("socat", ["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"]);
|
||||
|
||||
this.params = params;
|
||||
this.browser = browser;
|
||||
this.page = page;
|
||||
this.cdp = cdp;
|
||||
|
||||
//const target = page.target();
|
||||
this.targetId = targetId;
|
||||
|
||||
this.originSet = new Set();
|
||||
|
@ -331,15 +330,17 @@ class InteractiveBrowser {
|
|||
|
||||
page.on("load", () => this.handlePageLoad());
|
||||
|
||||
page.on("popup", async () => {
|
||||
await cdp.send("Target.activateTarget", {targetId: this.targetId});
|
||||
});
|
||||
// attempt to keep everything to initial tab if headless
|
||||
if (this.params.headless) {
|
||||
cdp.send("Page.enable");
|
||||
|
||||
cdp.on("Page.windowOpen", async (resp) => {
|
||||
if (resp.url) {
|
||||
await cdp.send("Target.activateTarget", {targetId: this.targetId});
|
||||
await page.goto(resp.url);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
this.shutdownWait = params.shutdownWait * 1000;
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "0.10.0-beta.3",
|
||||
"version": "0.10.0-beta.4",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
@ -17,7 +17,7 @@
|
|||
"ioredis": "^4.27.1",
|
||||
"js-yaml": "^4.1.0",
|
||||
"minio": "7.0.26",
|
||||
"puppeteer-core": "^19.11.1",
|
||||
"puppeteer-core": "^20.2.1",
|
||||
"sharp": "^0.32.1",
|
||||
"sitemapper": "^3.1.2",
|
||||
"uuid": "8.3.2",
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
pywb>=2.7.3
|
||||
pywb>=2.7.4
|
||||
uwsgi
|
||||
wacz>=0.4.8
|
||||
requests[socks]
|
||||
|
|
|
@ -34,17 +34,25 @@ export class BaseBrowser
|
|||
|
||||
const args = this.chromeArgs(chromeOptions);
|
||||
|
||||
let defaultViewport = null;
|
||||
|
||||
if (process.env.GEOMETRY) {
|
||||
const geom = process.env.GEOMETRY.split("x");
|
||||
|
||||
defaultViewport = {width: Number(geom[0]), height: Number(geom[1])};
|
||||
}
|
||||
|
||||
const launchOpts = {
|
||||
args,
|
||||
headless,
|
||||
headless: headless ? "new" : false,
|
||||
executablePath: this.getBrowserExe(),
|
||||
ignoreDefaultArgs: ["--enable-automation"],
|
||||
ignoreDefaultArgs: ["--enable-automation", "--hide-scrollbars"],
|
||||
ignoreHTTPSErrors: true,
|
||||
handleSIGHUP: signals,
|
||||
handleSIGINT: signals,
|
||||
handleSIGTERM: signals,
|
||||
|
||||
defaultViewport: null,
|
||||
defaultViewport,
|
||||
waitForInitialPage: false,
|
||||
userDataDir: this.profileDir
|
||||
};
|
||||
|
@ -114,6 +122,7 @@ export class BaseBrowser
|
|||
"--no-sandbox",
|
||||
"--disable-background-media-suspend",
|
||||
"--remote-debugging-port=9221",
|
||||
"--remote-allow-origins=*",
|
||||
"--autoplay-policy=no-user-gesture-required",
|
||||
"--disable-site-isolation-trials",
|
||||
`--user-agent=${userAgent || this.getDefaultUA()}`,
|
||||
|
|
58
yarn.lock
58
yarn.lock
|
@ -620,13 +620,14 @@
|
|||
resolved "https://registry.yarnpkg.com/@novnc/novnc/-/novnc-1.4.0.tgz#68adae81a741624142b518323441e852c1f34281"
|
||||
integrity sha512-kW6ALMc5BuH08e/ond/I1naYcfjc19JYMN1EdtmgjjjzPGCjW8fMtVM3MwM6q7YLRjPlQ3orEvoKMgSS7RkEVQ==
|
||||
|
||||
"@puppeteer/browsers@0.5.0":
|
||||
version "0.5.0"
|
||||
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-0.5.0.tgz#1a1ee454b84a986b937ca2d93146f25a3fe8b670"
|
||||
integrity sha512-Uw6oB7VvmPRLE4iKsjuOh8zgDabhNX67dzo8U/BB0f9527qx+4eeUs+korU98OhG5C4ubg7ufBgVi63XYwS6TQ==
|
||||
"@puppeteer/browsers@1.3.0":
|
||||
version "1.3.0"
|
||||
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.3.0.tgz#5ad26540ff54e8b8fca8ab50d2da9c60360a21b9"
|
||||
integrity sha512-an3QdbNPkuU6qpxpbssxAbjRLJcF+eP4L8UqIY3+6n0sbaVxw5pz7PiCLy9g32XEZuoamUlV5ZQPnA6FxvkIHA==
|
||||
dependencies:
|
||||
debug "4.3.4"
|
||||
extract-zip "2.0.1"
|
||||
http-proxy-agent "5.0.0"
|
||||
https-proxy-agent "5.0.1"
|
||||
progress "2.0.3"
|
||||
proxy-from-env "1.1.0"
|
||||
|
@ -665,6 +666,11 @@
|
|||
dependencies:
|
||||
defer-to-connect "^2.0.0"
|
||||
|
||||
"@tootallnate/once@2":
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-2.0.0.tgz#f544a148d3ab35801c1f633a7441fd87c2e484bf"
|
||||
integrity sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==
|
||||
|
||||
"@types/babel__core@^7.1.14":
|
||||
version "7.1.19"
|
||||
resolved "https://registry.yarnpkg.com/@types/babel__core/-/babel__core-7.1.19.tgz#7b497495b7d1b4812bdb9d02804d0576f43ee460"
|
||||
|
@ -1234,10 +1240,10 @@ chownr@^1.1.1:
|
|||
resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
|
||||
integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
|
||||
|
||||
chromium-bidi@0.4.7:
|
||||
version "0.4.7"
|
||||
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.7.tgz#4c022c2b0fb1d1c9b571fadf373042160e71d236"
|
||||
integrity sha512-6+mJuFXwTMU6I3vYLs6IL8A1DyQTPjCfIL971X0aMPVGRbGnNfl6i6Cl0NMbxi2bRYLGESt9T2ZIMRM5PAEcIQ==
|
||||
chromium-bidi@0.4.9:
|
||||
version "0.4.9"
|
||||
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.9.tgz#a1c6d7497e2b8ae3d639fd69dacb25025fa0a696"
|
||||
integrity sha512-u3DC6XwgLCA9QJ5ak1voPslCmacQdulZNCPsI3qNXxSnEcZS7DFIbww+5RM2bznMEje7cc0oydavRLRvOIZtHw==
|
||||
dependencies:
|
||||
mitt "3.0.0"
|
||||
|
||||
|
@ -1510,10 +1516,10 @@ detect-newline@^3.0.0:
|
|||
resolved "https://registry.yarnpkg.com/detect-newline/-/detect-newline-3.1.0.tgz#576f5dfc63ae1a192ff192d8ad3af6308991b651"
|
||||
integrity sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==
|
||||
|
||||
devtools-protocol@0.0.1107588:
|
||||
version "0.0.1107588"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1107588.tgz#f8cac707840b97cc30b029359341bcbbb0ad8ffa"
|
||||
integrity sha512-yIR+pG9x65Xko7bErCUSQaDLrO/P1p3JUzEk7JCU4DowPcGHkTGUGQapcfcLc4qj0UaALwZ+cr0riFgiqpixcg==
|
||||
devtools-protocol@0.0.1120988:
|
||||
version "0.0.1120988"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1120988.tgz#8fe49088919ae3b8df7235774633763f1f925066"
|
||||
integrity sha512-39fCpE3Z78IaIPChJsP6Lhmkbf4dWXOmzLk/KFTdRkNk/0JymRIfUynDVRndV9HoDz8PyalK1UH21ST/ivwW5Q==
|
||||
|
||||
diff-sequences@^29.2.0:
|
||||
version "29.2.0"
|
||||
|
@ -2148,6 +2154,15 @@ http-cache-semantics@^4.0.0:
|
|||
resolved "https://registry.yarnpkg.com/http-cache-semantics/-/http-cache-semantics-4.1.0.tgz#49e91c5cbf36c9b94bcfcd71c23d5249ec74e390"
|
||||
integrity sha512-carPklcUh7ROWRK7Cv27RPtdhYhUsela/ue5/jKzjegVvXDqM2ILE9Q2BGn9JZJh1g87cp56su/FgQSzcWS8cQ==
|
||||
|
||||
http-proxy-agent@5.0.0:
|
||||
version "5.0.0"
|
||||
resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-5.0.0.tgz#5129800203520d434f142bc78ff3c170800f2b43"
|
||||
integrity sha512-n2hY8YdoRE1i7r6M0w9DIw5GgZN0G25P8zLCRQ8rjXtTU3vsNFBI/vWK/UIeE6g5MUUz6avwAPXmL6Fy9D/90w==
|
||||
dependencies:
|
||||
"@tootallnate/once" "2"
|
||||
agent-base "6"
|
||||
debug "4"
|
||||
|
||||
http2-wrapper@^1.0.0-beta.5.2:
|
||||
version "1.0.3"
|
||||
resolved "https://registry.yarnpkg.com/http2-wrapper/-/http2-wrapper-1.0.3.tgz#b8f55e0c1f25d4ebd08b3b0c2c079f9590800b3d"
|
||||
|
@ -3495,21 +3510,16 @@ punycode@^2.1.0:
|
|||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
|
||||
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
|
||||
|
||||
puppeteer-core@^19.11.1:
|
||||
version "19.11.1"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-19.11.1.tgz#4c63d7a0a6cd268ff054ebcac315b646eee32667"
|
||||
integrity sha512-qcuC2Uf0Fwdj9wNtaTZ2OvYRraXpAK+puwwVW8ofOhOgLPZyz1c68tsorfIZyCUOpyBisjr+xByu7BMbEYMepA==
|
||||
puppeteer-core@^20.2.1:
|
||||
version "20.2.1"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.2.1.tgz#e920e06e76ef2d1a1d98ea847611a8d662d3cd97"
|
||||
integrity sha512-HmNMcL+g9ght0nCzS5BmEvom2IVhp1/xhlQ9O+fmihQx0EYwYUqgCQhs8Hbv7IapiqKhahKCC8PKfCNTXXsKbQ==
|
||||
dependencies:
|
||||
"@puppeteer/browsers" "0.5.0"
|
||||
chromium-bidi "0.4.7"
|
||||
"@puppeteer/browsers" "1.3.0"
|
||||
chromium-bidi "0.4.9"
|
||||
cross-fetch "3.1.5"
|
||||
debug "4.3.4"
|
||||
devtools-protocol "0.0.1107588"
|
||||
extract-zip "2.0.1"
|
||||
https-proxy-agent "5.0.1"
|
||||
proxy-from-env "1.1.0"
|
||||
tar-fs "2.1.1"
|
||||
unbzip2-stream "1.4.3"
|
||||
devtools-protocol "0.0.1120988"
|
||||
ws "8.13.0"
|
||||
|
||||
querystring@0.2.0:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue