mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-07 13:49:47 +00:00
netIdle cleanup + better default for pages where networkIdle timesout (#916)
- set default networkIdle to 2 - add netIdleMaxRequests as an option, default to 1 (in case of long running requests) - further fix for #913 - avoid accidental logging --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
8c8fd6be08
commit
87edef3362
4 changed files with 26 additions and 18 deletions
|
|
@ -221,11 +221,13 @@ Options:
|
|||
interrupted, don't run post-crawl pr
|
||||
ocesses on interrupt
|
||||
[boolean] [default: false]
|
||||
--netIdleWait if set, wait for network idle after
|
||||
page load and after behaviors are do
|
||||
ne (in seconds). if -1 (default), de
|
||||
termine based on scope
|
||||
[number] [default: -1]
|
||||
--netIdleWait number of seconds to wait for networ
|
||||
k idle after page load and after beh
|
||||
aviors are done (default: 2)
|
||||
[number] [default: 2]
|
||||
--netIdleMaxRequests max active requests allowed for netw
|
||||
ork to be considered idle
|
||||
[default: 1]
|
||||
--lang if set, sets the language used by th
|
||||
e browser, should be ISO 639 languag
|
||||
e[-country] code [string]
|
||||
|
|
@ -318,6 +320,10 @@ Options:
|
|||
--sshProxyKnownHostsFile path to SSH known hosts file for SOC
|
||||
KS5 over SSH proxy connection
|
||||
[string]
|
||||
--extraChromeArgs Extra arguments to pass directly to
|
||||
the Chrome instance (space-separated
|
||||
or multiple --extraChromeArgs)
|
||||
[array] [default: []]
|
||||
--config Path to YAML config file
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -2329,6 +2329,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
try {
|
||||
await this.browser.waitForNetworkIdle(page, {
|
||||
timeout: this.params.netIdleWait * 1000,
|
||||
concurrency: this.params.netIdleMaxRequests,
|
||||
});
|
||||
} catch (e) {
|
||||
logger.debug("waitForNetworkIdle timed out, ignoring", details);
|
||||
|
|
|
|||
|
|
@ -516,9 +516,15 @@ class ArgParser {
|
|||
|
||||
netIdleWait: {
|
||||
describe:
|
||||
"if set, wait for network idle after page load and after behaviors are done (in seconds). if -1 (default), determine based on scope",
|
||||
"number of seconds to wait for network idle after page load and after behaviors are done (default: 2)",
|
||||
type: "number",
|
||||
default: -1,
|
||||
default: 2,
|
||||
},
|
||||
|
||||
netIdleMaxRequests: {
|
||||
describe:
|
||||
"max active requests allowed for network to be considered idle",
|
||||
default: 1,
|
||||
},
|
||||
|
||||
lang: {
|
||||
|
|
@ -837,15 +843,6 @@ class ArgParser {
|
|||
|
||||
argv.selectLinks = selectLinks;
|
||||
|
||||
if (argv.netIdleWait === -1) {
|
||||
if (argv.scopeType === "page" || argv.scopeType === "page-spa") {
|
||||
argv.netIdleWait = 15;
|
||||
} else {
|
||||
argv.netIdleWait = 2;
|
||||
}
|
||||
//logger.debug(`Set netIdleWait to ${argv.netIdleWait} seconds`);
|
||||
}
|
||||
|
||||
if (isQA && !argv.qaSource) {
|
||||
logger.fatal("--qaSource required for QA mode");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,8 +23,11 @@ import puppeteer, {
|
|||
LaunchOptions,
|
||||
Viewport,
|
||||
CookieData,
|
||||
WaitForNetworkIdleOptions,
|
||||
CDPSession,
|
||||
Target,
|
||||
Browser as PptrBrowser,
|
||||
} from "puppeteer-core";
|
||||
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
|
||||
import { Recorder } from "./recorder.js";
|
||||
import { timedRun } from "./timing.js";
|
||||
import assert from "node:assert";
|
||||
|
|
@ -244,6 +247,7 @@ export class Browser {
|
|||
try {
|
||||
child_process.execSync("rm ./Singleton*", {
|
||||
cwd: this.profileDir,
|
||||
stdio: "ignore",
|
||||
});
|
||||
} catch (e) {
|
||||
// ignore
|
||||
|
|
@ -700,7 +704,7 @@ export class Browser {
|
|||
page.on("request", callback);
|
||||
}
|
||||
|
||||
async waitForNetworkIdle(page: Page, params: { timeout?: number }) {
|
||||
async waitForNetworkIdle(page: Page, params: WaitForNetworkIdleOptions) {
|
||||
return await page.waitForNetworkIdle(params);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue