netIdle cleanup + better default for pages where networkIdle timesout (#916)

- set default networkIdle to 2
- add netIdleMaxRequests as an option, default to 1 (in case of long
running requests)
- further fix for #913 
- avoid accidental logging

---------

Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
Ilya Kreymer 2025-11-18 16:34:02 -08:00 committed by GitHub
parent 8c8fd6be08
commit 87edef3362
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 26 additions and 18 deletions

View file

@ -221,11 +221,13 @@ Options:
interrupted, don't run post-crawl pr interrupted, don't run post-crawl pr
ocesses on interrupt ocesses on interrupt
[boolean] [default: false] [boolean] [default: false]
--netIdleWait if set, wait for network idle after --netIdleWait number of seconds to wait for networ
page load and after behaviors are do k idle after page load and after beh
ne (in seconds). if -1 (default), de aviors are done (default: 2)
termine based on scope [number] [default: 2]
[number] [default: -1] --netIdleMaxRequests max active requests allowed for netw
ork to be considered idle
[default: 1]
--lang if set, sets the language used by th --lang if set, sets the language used by th
e browser, should be ISO 639 languag e browser, should be ISO 639 languag
e[-country] code [string] e[-country] code [string]
@ -318,6 +320,10 @@ Options:
--sshProxyKnownHostsFile path to SSH known hosts file for SOC --sshProxyKnownHostsFile path to SSH known hosts file for SOC
KS5 over SSH proxy connection KS5 over SSH proxy connection
[string] [string]
--extraChromeArgs Extra arguments to pass directly to
the Chrome instance (space-separated
or multiple --extraChromeArgs)
[array] [default: []]
--config Path to YAML config file --config Path to YAML config file
``` ```

View file

@ -2329,6 +2329,7 @@ self.__bx_behaviors.selectMainBehavior();
try { try {
await this.browser.waitForNetworkIdle(page, { await this.browser.waitForNetworkIdle(page, {
timeout: this.params.netIdleWait * 1000, timeout: this.params.netIdleWait * 1000,
concurrency: this.params.netIdleMaxRequests,
}); });
} catch (e) { } catch (e) {
logger.debug("waitForNetworkIdle timed out, ignoring", details); logger.debug("waitForNetworkIdle timed out, ignoring", details);

View file

@ -516,9 +516,15 @@ class ArgParser {
netIdleWait: { netIdleWait: {
describe: describe:
"if set, wait for network idle after page load and after behaviors are done (in seconds). if -1 (default), determine based on scope", "number of seconds to wait for network idle after page load and after behaviors are done (default: 2)",
type: "number", type: "number",
default: -1, default: 2,
},
netIdleMaxRequests: {
describe:
"max active requests allowed for network to be considered idle",
default: 1,
}, },
lang: { lang: {
@ -837,15 +843,6 @@ class ArgParser {
argv.selectLinks = selectLinks; argv.selectLinks = selectLinks;
if (argv.netIdleWait === -1) {
if (argv.scopeType === "page" || argv.scopeType === "page-spa") {
argv.netIdleWait = 15;
} else {
argv.netIdleWait = 2;
}
//logger.debug(`Set netIdleWait to ${argv.netIdleWait} seconds`);
}
if (isQA && !argv.qaSource) { if (isQA && !argv.qaSource) {
logger.fatal("--qaSource required for QA mode"); logger.fatal("--qaSource required for QA mode");
} }

View file

@ -23,8 +23,11 @@ import puppeteer, {
LaunchOptions, LaunchOptions,
Viewport, Viewport,
CookieData, CookieData,
WaitForNetworkIdleOptions,
CDPSession,
Target,
Browser as PptrBrowser,
} from "puppeteer-core"; } from "puppeteer-core";
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
import { Recorder } from "./recorder.js"; import { Recorder } from "./recorder.js";
import { timedRun } from "./timing.js"; import { timedRun } from "./timing.js";
import assert from "node:assert"; import assert from "node:assert";
@ -244,6 +247,7 @@ export class Browser {
try { try {
child_process.execSync("rm ./Singleton*", { child_process.execSync("rm ./Singleton*", {
cwd: this.profileDir, cwd: this.profileDir,
stdio: "ignore",
}); });
} catch (e) { } catch (e) {
// ignore // ignore
@ -700,7 +704,7 @@ export class Browser {
page.on("request", callback); page.on("request", callback);
} }
async waitForNetworkIdle(page: Page, params: { timeout?: number }) { async waitForNetworkIdle(page: Page, params: WaitForNetworkIdleOptions) {
return await page.waitForNetworkIdle(params); return await page.waitForNetworkIdle(params);
} }