diff --git a/package.json b/package.json index 4e1fb750..f7cd6003 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "dependencies": { "@novnc/novnc": "^1.4.0", "@webrecorder/wabac": "^2.16.12", - "browsertrix-behaviors": "^0.5.2", + "browsertrix-behaviors": "^0.5.3", "crc": "^4.3.2", "get-folder-size": "^4.0.0", "husky": "^8.0.3", diff --git a/src/crawler.ts b/src/crawler.ts index 5bd2925f..7152f66b 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -1722,17 +1722,8 @@ self.__bx_behaviors.selectMainBehavior(); ) { const { seedId, depth, extraHops = 0, filteredFrames, callbacks } = data; - let links: string[] = []; - const promiseList = []; - - callbacks.addLink = (url: string) => { - links.push(url); - if (links.length == 500) { - promiseList.push( - this.queueInScopeUrls(seedId, links, depth, extraHops, logDetails), - ); - links = []; - } + callbacks.addLink = async (url: string) => { + await this.queueInScopeUrls(seedId, [url], depth, extraHops, logDetails); }; const loadLinks = (options: { @@ -1801,14 +1792,6 @@ self.__bx_behaviors.selectMainBehavior(); } catch (e) { logger.warn("Link Extraction failed", e, "links"); } - - if (links.length) { - promiseList.push( - this.queueInScopeUrls(seedId, links, depth, extraHops, logDetails), - ); - } - - await Promise.allSettled(promiseList); } async queueInScopeUrls( diff --git a/src/util/state.ts b/src/util/state.ts index c28fa814..cfbdc573 100644 --- a/src/util/state.ts +++ b/src/util/state.ts @@ -36,7 +36,7 @@ export type QueueEntry = { // ============================================================================ export type PageCallbacks = { - addLink?: (url: string) => void; + addLink?: (url: string) => Promise; }; // ============================================================================ diff --git a/yarn.lock b/yarn.lock index bfe9d200..a414d860 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1425,10 +1425,10 @@ browserslist@^4.21.3: node-releases "^2.0.6" update-browserslist-db "^1.0.9" -browsertrix-behaviors@^0.5.2: - version "0.5.2" - resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.5.2.tgz#d2fe1d6ff08815ff0dd68a05fe1a3cdc4bbec8ca" - integrity sha512-8nhpnzY8OM1mxQ+mZ+m10dpGgMuhCnKUV5YUlitDpMyEfKlEybUmTz5sroVQH8e//NcJox7W6QYjaU2Y/ygxww== +browsertrix-behaviors@^0.5.3: + version "0.5.3" + resolved "https://registry.yarnpkg.com/browsertrix-behaviors/-/browsertrix-behaviors-0.5.3.tgz#f987075790b0fd970814f57195e8525277ddd2a0" + integrity sha512-NiVdV42xvj4DvX/z0Dxqzqsa+5e57/M7hIyK3fl41BxzOJqCgSMu0MpkrWuKpbRVo+89ZnBmzh2z6D18Vmn1LA== bser@2.1.1: version "2.1.1"