crash page on prompt dialog loop to continue: (#929)

- if a page is stuck in a window.alert / window.prompt loop, showing >10
or more consecutive dialogs (unrelated to unloading), call Page.crash()
to more quickly move on to next page, as not much else can be done.
- add exception handling in dialog accept/dismiss to avoid crawler crash
- fixes #926
This commit is contained in:
Ilya Kreymer 2025-12-01 16:57:00 -08:00 committed by GitHub
parent 8e44b31b45
commit 59df6bbd3f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 39 additions and 16 deletions

View file

@ -47,6 +47,7 @@ import {
ExitCodes,
InterruptReason,
BxFunctionBindings,
MAX_JS_DIALOG_PER_PAGE,
} from "./util/constants.js";
import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
@ -874,30 +875,49 @@ self.__bx_behaviors.selectMainBehavior();
await this.browser.addInitScript(page, initScript);
}
let dialogCount = 0;
// Handle JS dialogs:
// - Ensure off-page navigation is canceled while behavior is running
// - dismiss close all other dialogs if not blocking unload
page.on("dialog", async (dialog) => {
let accepted = true;
if (dialog.type() === "beforeunload") {
if (opts.pageBlockUnload) {
accepted = false;
await dialog.dismiss();
let msg = {};
try {
if (dialog.type() === "beforeunload") {
if (opts.pageBlockUnload) {
accepted = false;
}
} else {
await dialog.accept();
// other JS dialog, just dismiss
accepted = false;
if (dialogCount >= MAX_JS_DIALOG_PER_PAGE) {
// dialog likely in a loop, need to crash page to avoid being stuck
logger.error(
"JS Dialog appears to be in a loop, crashing page to continue",
);
await cdp.send("Page.crash");
return;
}
dialogCount++;
}
} else {
// other JS dialog, just dismiss
await dialog.dismiss();
msg = {
accepted,
blockingUnload: opts.pageBlockUnload,
message: dialog.message(),
type: dialog.type(),
page: page.url(),
workerid,
};
if (accepted) {
await dialog.accept();
} else {
await dialog.dismiss();
}
logger.debug("JS Dialog", msg);
} catch (e) {
logger.warn("JS Dialog Error", { ...msg, ...formatErr(e) });
}
logger.debug("JS Dialog", {
accepted,
blockingUnload: opts.pageBlockUnload,
message: dialog.message(),
type: dialog.type(),
page: page.url(),
workerid,
});
});
// only add if running with autoclick behavior

View file

@ -43,6 +43,9 @@ export const SITEMAP_INITIAL_FETCH_TIMEOUT_SECS = 30;
export const ROBOTS_CACHE_LIMIT = 100;
// max JS dialogs (alert/prompt) to allow per page
export const MAX_JS_DIALOG_PER_PAGE = 10;
export type ExtractSelector = {
selector: string;
extract: string;