mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Add timeout to final awaitPendingClear() (#442)
Ensure the final pending wait also has a timeout, set to max page timeout x num workers. Could also set higher, but needs to have a timeout, eg. in case of downloading live stream that never terminates. Fixes #348 in the 0.12.x line. Also bumps version to 0.12.3
This commit is contained in:
parent
540c355d25
commit
c3b98e5047
2 changed files with 9 additions and 3 deletions
10
crawler.js
10
crawler.js
|
@ -898,8 +898,14 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
|
||||
await this.writeStats();
|
||||
|
||||
// extra wait for all resources to land into WARCs
|
||||
await this.awaitPendingClear();
|
||||
// extra wait with timeout for all resources to land into WARCs
|
||||
await timedRun(
|
||||
this.awaitPendingClear(),
|
||||
this.maxPageTime * this.params.workers,
|
||||
"Waiting for pending resources timed out",
|
||||
{timeout: this.maxPageTime * this.params.workers},
|
||||
"general"
|
||||
);
|
||||
|
||||
// if crawl has been stopped, mark as final exit for post-crawl tasks
|
||||
if (await this.crawlState.isCrawlStopped()) {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "0.12.2",
|
||||
"version": "0.12.3",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue