mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Improved handling of browser stuck / crashed (#763)
- only attempt to close browser if not browser crashed - add timeout for browser.close() - ensure browser crash results in healthchecker failure - bump to 1.5.3
This commit is contained in:
parent
5807c320bf
commit
846f0355f6
5 changed files with 24 additions and 6 deletions
|
@ -1,4 +1,4 @@
|
|||
ARG BROWSER_VERSION=1.74.51
|
||||
ARG BROWSER_VERSION=1.75.175
|
||||
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:brave-${BROWSER_VERSION}
|
||||
|
||||
FROM ${BROWSER_IMAGE_BASE}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "1.5.2",
|
||||
"version": "1.5.3",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
|
|
@ -1607,13 +1607,12 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
},
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
ondisconnect: (err: any) => {
|
||||
this.interrupted = true;
|
||||
this.markBrowserCrashed();
|
||||
logger.error(
|
||||
"Browser disconnected (crashed?), interrupting crawl",
|
||||
err,
|
||||
"browser",
|
||||
);
|
||||
this.browserCrashed = true;
|
||||
},
|
||||
|
||||
recording: this.recording,
|
||||
|
@ -1753,6 +1752,14 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
}
|
||||
}
|
||||
|
||||
markBrowserCrashed() {
|
||||
this.interrupted = true;
|
||||
this.browserCrashed = true;
|
||||
if (this.healthChecker) {
|
||||
this.healthChecker.browserCrashed = true;
|
||||
}
|
||||
}
|
||||
|
||||
async closeLog(): Promise<void> {
|
||||
// close file-based log
|
||||
logger.setExternalLogStream(null);
|
||||
|
|
|
@ -7,6 +7,7 @@ export class HealthChecker {
|
|||
port: number;
|
||||
errorThreshold: number;
|
||||
healthServer: http.Server;
|
||||
browserCrashed = false;
|
||||
|
||||
updater: (() => Promise<void>) | null;
|
||||
|
||||
|
@ -33,7 +34,7 @@ export class HealthChecker {
|
|||
const pathname = req.url ? url.parse(req.url).pathname : "";
|
||||
switch (pathname) {
|
||||
case "/healthz":
|
||||
if (this.errorCount < this.errorThreshold) {
|
||||
if (this.errorCount < this.errorThreshold && !this.browserCrashed) {
|
||||
logger.debug(
|
||||
`health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`,
|
||||
{},
|
||||
|
|
|
@ -7,6 +7,7 @@ import { rxEscape } from "./seeds.js";
|
|||
import { CDPSession, Page } from "puppeteer-core";
|
||||
import { PageState, WorkerId } from "./state.js";
|
||||
import { Crawler } from "../crawler.js";
|
||||
import { PAGE_OP_TIMEOUT_SECS } from "./constants.js";
|
||||
|
||||
const MAX_REUSE = 5;
|
||||
|
||||
|
@ -432,7 +433,16 @@ export async function runWorkers(
|
|||
|
||||
await closeWorkers();
|
||||
|
||||
await crawler.browser.close();
|
||||
if (!crawler.browserCrashed) {
|
||||
await timedRun(
|
||||
crawler.browser.close(),
|
||||
PAGE_OP_TIMEOUT_SECS,
|
||||
"Closing Browser Timed Out",
|
||||
{},
|
||||
"worker",
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue