diff --git a/src/crawler.ts b/src/crawler.ts index 713f591e..d9bce83d 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -2048,33 +2048,31 @@ self.__bx_behaviors.selectMainBehavior(); extract = "href", isAttribute = false, } of selectors) { - const promiseResults = await Promise.allSettled( - frames.map((frame) => - timedRun( - frame.evaluate(loadLinks, { + await Promise.allSettled( + frames.map((frame) => { + const getLinks = frame + .evaluate(loadLinks, { selector, extract, isAttribute, addLinkFunc: ADD_LINK_FUNC, - }), + }) + .catch((e) => + logger.warn("Link Extraction failed in frame", { + frameUrl: frame.url, + ...logDetails, + ...formatErr(e), + }), + ); + + return timedRun( + getLinks, PAGE_OP_TIMEOUT_SECS, "Link extraction timed out", logDetails, - ), - ), + ); + }), ); - - for (let i = 0; i < promiseResults.length; i++) { - // eslint-disable-next-line @typescript-eslint/no-explicit-any - const { status, reason } = promiseResults[i] as any; - if (status === "rejected") { - logger.warn("Link Extraction failed in frame", { - reason, - frameUrl: frames[i].url, - ...logDetails, - }); - } - } } } catch (e) { logger.warn("Link Extraction failed", e, "links"); diff --git a/src/util/timing.ts b/src/util/timing.ts index 714345e3..a3c63273 100644 --- a/src/util/timing.ts +++ b/src/util/timing.ts @@ -18,14 +18,16 @@ export function timedRun( // return Promise return value or log error if timeout is reached first const timeout = seconds * 1000; + let tm: NodeJS.Timeout; + const rejectPromiseOnTimeout = (timeout: number) => { return new Promise((resolve, reject) => { - setTimeout(() => reject("timeout reached"), timeout); + tm = setTimeout(() => reject("timeout reached"), timeout); }); }; - return Promise.race([promise, rejectPromiseOnTimeout(timeout)]).catch( - (err) => { + return Promise.race([promise, rejectPromiseOnTimeout(timeout)]) + .catch((err) => { if (err === "timeout reached") { const logFunc = isWarn ? logger.warn : logger.error; logFunc.call( @@ -38,8 +40,8 @@ export function timedRun( //logger.error("Unknown exception", {...errJSON(err), ...logDetails}, context); throw err; } - }, - ); + }) + .finally(() => clearTimeout(tm)); } export function secondsElapsed(startTime: number, nowDate: Date | null = null) {