link extraction promise cleanup: (#701)

- catch frame.evaluate() directly and log errors there to avoid any
possibility of exception being propagated before wrapping in timedRun()
- also add clearTimeout() to timedRun()
- possibly fixes openzim/zimit#376
This commit is contained in:
Ilya Kreymer 2024-10-11 00:11:24 -07:00 committed by GitHub
parent 157ac34d8c
commit 652cf9cfa6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 24 additions and 24 deletions

View file

@ -2048,33 +2048,31 @@ self.__bx_behaviors.selectMainBehavior();
extract = "href",
isAttribute = false,
} of selectors) {
const promiseResults = await Promise.allSettled(
frames.map((frame) =>
timedRun(
frame.evaluate(loadLinks, {
await Promise.allSettled(
frames.map((frame) => {
const getLinks = frame
.evaluate(loadLinks, {
selector,
extract,
isAttribute,
addLinkFunc: ADD_LINK_FUNC,
}),
})
.catch((e) =>
logger.warn("Link Extraction failed in frame", {
frameUrl: frame.url,
...logDetails,
...formatErr(e),
}),
);
return timedRun(
getLinks,
PAGE_OP_TIMEOUT_SECS,
"Link extraction timed out",
logDetails,
),
),
);
}),
);
for (let i = 0; i < promiseResults.length; i++) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const { status, reason } = promiseResults[i] as any;
if (status === "rejected") {
logger.warn("Link Extraction failed in frame", {
reason,
frameUrl: frames[i].url,
...logDetails,
});
}
}
}
} catch (e) {
logger.warn("Link Extraction failed", e, "links");

View file

@ -18,14 +18,16 @@ export function timedRun(
// return Promise return value or log error if timeout is reached first
const timeout = seconds * 1000;
let tm: NodeJS.Timeout;
const rejectPromiseOnTimeout = (timeout: number) => {
return new Promise((resolve, reject) => {
setTimeout(() => reject("timeout reached"), timeout);
tm = setTimeout(() => reject("timeout reached"), timeout);
});
};
return Promise.race([promise, rejectPromiseOnTimeout(timeout)]).catch(
(err) => {
return Promise.race([promise, rejectPromiseOnTimeout(timeout)])
.catch((err) => {
if (err === "timeout reached") {
const logFunc = isWarn ? logger.warn : logger.error;
logFunc.call(
@ -38,8 +40,8 @@ export function timedRun(
//logger.error("Unknown exception", {...errJSON(err), ...logDetails}, context);
throw err;
}
},
);
})
.finally(() => clearTimeout(tm));
}
export function secondsElapsed(startTime: number, nowDate: Date | null = null) {