From 59df6bbd3f5e3a56291f2b19ca528fbb3ecf884e Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 1 Dec 2025 16:57:00 -0800 Subject: [PATCH] crash page on prompt dialog loop to continue: (#929) - if a page is stuck in a window.alert / window.prompt loop, showing >10 or more consecutive dialogs (unrelated to unloading), call Page.crash() to more quickly move on to next page, as not much else can be done. - add exception handling in dialog accept/dismiss to avoid crawler crash - fixes #926 --- src/crawler.ts | 52 ++++++++++++++++++++++++++++++------------- src/util/constants.ts | 3 +++ 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/crawler.ts b/src/crawler.ts index d822791b..9c0cdde5 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -47,6 +47,7 @@ import { ExitCodes, InterruptReason, BxFunctionBindings, + MAX_JS_DIALOG_PER_PAGE, } from "./util/constants.js"; import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js"; @@ -874,30 +875,49 @@ self.__bx_behaviors.selectMainBehavior(); await this.browser.addInitScript(page, initScript); } + let dialogCount = 0; + // Handle JS dialogs: // - Ensure off-page navigation is canceled while behavior is running // - dismiss close all other dialogs if not blocking unload page.on("dialog", async (dialog) => { let accepted = true; - if (dialog.type() === "beforeunload") { - if (opts.pageBlockUnload) { - accepted = false; - await dialog.dismiss(); + let msg = {}; + try { + if (dialog.type() === "beforeunload") { + if (opts.pageBlockUnload) { + accepted = false; + } } else { - await dialog.accept(); + // other JS dialog, just dismiss + accepted = false; + if (dialogCount >= MAX_JS_DIALOG_PER_PAGE) { + // dialog likely in a loop, need to crash page to avoid being stuck + logger.error( + "JS Dialog appears to be in a loop, crashing page to continue", + ); + await cdp.send("Page.crash"); + return; + } + dialogCount++; } - } else { - // other JS dialog, just dismiss - await dialog.dismiss(); + msg = { + accepted, + blockingUnload: opts.pageBlockUnload, + message: dialog.message(), + type: dialog.type(), + page: page.url(), + workerid, + }; + if (accepted) { + await dialog.accept(); + } else { + await dialog.dismiss(); + } + logger.debug("JS Dialog", msg); + } catch (e) { + logger.warn("JS Dialog Error", { ...msg, ...formatErr(e) }); } - logger.debug("JS Dialog", { - accepted, - blockingUnload: opts.pageBlockUnload, - message: dialog.message(), - type: dialog.type(), - page: page.url(), - workerid, - }); }); // only add if running with autoclick behavior diff --git a/src/util/constants.ts b/src/util/constants.ts index ebf83c57..fa8232ce 100644 --- a/src/util/constants.ts +++ b/src/util/constants.ts @@ -43,6 +43,9 @@ export const SITEMAP_INITIAL_FETCH_TIMEOUT_SECS = 30; export const ROBOTS_CACHE_LIMIT = 100; +// max JS dialogs (alert/prompt) to allow per page +export const MAX_JS_DIALOG_PER_PAGE = 10; + export type ExtractSelector = { selector: string; extract: string;