Improved handling of browser stuck / crashed (#763)

- only attempt to close browser if not browser crashed
- add timeout for browser.close()
- ensure browser crash results in healthchecker failure
- bump to 1.5.3
This commit is contained in:
Ilya Kreymer 2025-02-10 10:16:25 -08:00 committed by GitHub
parent 5807c320bf
commit 846f0355f6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 24 additions and 6 deletions

View file

@ -1,4 +1,4 @@
ARG BROWSER_VERSION=1.74.51
ARG BROWSER_VERSION=1.75.175
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:brave-${BROWSER_VERSION}
FROM ${BROWSER_IMAGE_BASE}

View file

@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "1.5.2",
"version": "1.5.3",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",

View file

@ -1607,13 +1607,12 @@ self.__bx_behaviors.selectMainBehavior();
},
// eslint-disable-next-line @typescript-eslint/no-explicit-any
ondisconnect: (err: any) => {
this.interrupted = true;
this.markBrowserCrashed();
logger.error(
"Browser disconnected (crashed?), interrupting crawl",
err,
"browser",
);
this.browserCrashed = true;
},
recording: this.recording,
@ -1753,6 +1752,14 @@ self.__bx_behaviors.selectMainBehavior();
}
}
markBrowserCrashed() {
this.interrupted = true;
this.browserCrashed = true;
if (this.healthChecker) {
this.healthChecker.browserCrashed = true;
}
}
async closeLog(): Promise<void> {
// close file-based log
logger.setExternalLogStream(null);

View file

@ -7,6 +7,7 @@ export class HealthChecker {
port: number;
errorThreshold: number;
healthServer: http.Server;
browserCrashed = false;
updater: (() => Promise<void>) | null;
@ -33,7 +34,7 @@ export class HealthChecker {
const pathname = req.url ? url.parse(req.url).pathname : "";
switch (pathname) {
case "/healthz":
if (this.errorCount < this.errorThreshold) {
if (this.errorCount < this.errorThreshold && !this.browserCrashed) {
logger.debug(
`health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`,
{},

View file

@ -7,6 +7,7 @@ import { rxEscape } from "./seeds.js";
import { CDPSession, Page } from "puppeteer-core";
import { PageState, WorkerId } from "./state.js";
import { Crawler } from "../crawler.js";
import { PAGE_OP_TIMEOUT_SECS } from "./constants.js";
const MAX_REUSE = 5;
@ -432,7 +433,16 @@ export async function runWorkers(
await closeWorkers();
await crawler.browser.close();
if (!crawler.browserCrashed) {
await timedRun(
crawler.browser.close(),
PAGE_OP_TIMEOUT_SECS,
"Closing Browser Timed Out",
{},
"worker",
true,
);
}
}
// ===========================================================================