From b42548373d13d9e6b1e5c30601bdb4d61d2499ed Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Fri, 6 Sep 2024 16:24:18 -0700 Subject: [PATCH] eslint: add strict await checking: (#684) - require await / void / catch for promises - don't allow unnecessary await --- .eslintrc.cjs | 4 ++++ src/crawler.ts | 24 ++++++++++++++---------- src/create-login-profile.ts | 8 +++++--- src/main.ts | 4 ++-- src/replaycrawler.ts | 4 ++-- src/util/blockrules.ts | 2 +- src/util/logger.ts | 7 +++++-- src/util/originoverride.ts | 8 ++++---- src/util/proxy.ts | 4 +++- src/util/recorder.ts | 16 ++++++++++------ src/util/screencaster.ts | 20 ++++++++++++-------- src/util/sitemapper.ts | 6 +++--- src/util/state.ts | 4 +++- src/util/warcwriter.ts | 4 ++-- src/util/worker.ts | 2 +- tsconfig.eslint.json | 8 ++++++++ 16 files changed, 79 insertions(+), 46 deletions(-) create mode 100644 tsconfig.eslint.json diff --git a/.eslintrc.cjs b/.eslintrc.cjs index 9f97dfe7..a5d40eee 100644 --- a/.eslintrc.cjs +++ b/.eslintrc.cjs @@ -15,6 +15,8 @@ module.exports = { parserOptions: { ecmaVersion: 12, sourceType: "module", + project: ["./tsconfig.eslint.json"], + tsconfigRootDir: __dirname, }, rules: { "no-constant-condition": ["error", { checkLoops: false }], @@ -27,6 +29,8 @@ module.exports = { allowNamedExports: true, }, ], + "@typescript-eslint/no-floating-promises": "error", + "@typescript-eslint/await-thenable": "error" }, reportUnusedDisableDirectives: true, }; diff --git a/src/crawler.ts b/src/crawler.ts index 81eee6a0..f8c8e659 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -193,7 +193,7 @@ export class Crawler { data: PageState; // eslint-disable-next-line no-use-before-define crawler: Crawler; - }) => NonNullable; + }) => Promise; recording: boolean; @@ -770,7 +770,7 @@ self.__bx_behaviors.selectMainBehavior(); ) { await cdp.send("Runtime.enable"); - await cdp.on( + cdp.on( "Runtime.executionContextCreated", (params: Protocol.Runtime.ExecutionContextCreatedEvent) => { const { id, auxData } = params.context; @@ -780,7 +780,7 @@ self.__bx_behaviors.selectMainBehavior(); }, ); - await cdp.on( + cdp.on( "Runtime.executionContextDestroyed", (params: Protocol.Runtime.ExecutionContextDestroyedEvent) => { const { executionContextId } = params; @@ -793,7 +793,7 @@ self.__bx_behaviors.selectMainBehavior(); }, ); - await cdp.on("Runtime.executionContextsCleared", () => { + cdp.on("Runtime.executionContextsCleared", () => { frameIdToExecId.clear(); }); } @@ -1877,7 +1877,7 @@ self.__bx_behaviors.selectMainBehavior(); } // HTML Pages Only here - const frames = await page.frames(); + const frames = page.frames(); const filteredFrames = await Promise.allSettled( frames.map((frame) => this.shouldIncludeFrame(frame, logDetails)), @@ -2190,7 +2190,7 @@ self.__bx_behaviors.selectMainBehavior(); } else { logger.debug("Text Extraction: None"); } - await fh.write(JSON.stringify(header) + "\n"); + fh.write(JSON.stringify(header) + "\n"); } } catch (err) { logger.error(`"${filename}" creation failed`, err); @@ -2271,7 +2271,7 @@ self.__bx_behaviors.selectMainBehavior(); } try { - await pagesFH.write(processedRow); + pagesFH.write(processedRow); } catch (err) { logger.warn( "Page append failed", @@ -2336,7 +2336,9 @@ self.__bx_behaviors.selectMainBehavior(); { urlsFound: sitemapper.count, limitHit: sitemapper.atLimit() }, "sitemap", ); - this.crawlState.markSitemapDone(); + this.crawlState + .markSitemapDone() + .catch((e) => logger.warn("Error marking sitemap done", e)); finished = true; } }); @@ -2354,7 +2356,9 @@ self.__bx_behaviors.selectMainBehavior(); "sitemap", ); } - this.queueInScopeUrls(seedId, [url], 0, 0, true); + this.queueInScopeUrls(seedId, [url], 0, 0, true).catch((e) => + logger.warn("Error queuing urls", e, "links"), + ); if (count >= 100 && !resolved) { logger.info( "Sitemap partially parsed, continue parsing large sitemap in the background", @@ -2458,7 +2462,7 @@ self.__bx_behaviors.selectMainBehavior(); } if (fh) { - await fh.end(); + fh.end(); } logger.debug(`Combined WARCs saved as: ${generatedCombinedWarcs}`); diff --git a/src/create-login-profile.ts b/src/create-login-profile.ts index 0db05eab..1e217c6e 100755 --- a/src/create-login-profile.ts +++ b/src/create-login-profile.ts @@ -437,7 +437,7 @@ class InteractiveBrowser { // attempt to keep everything to initial tab if headless if (this.params.headless) { - cdp.send("Page.enable"); + cdp.send("Page.enable").catch((e) => logger.warn("Page.enable error", e)); cdp.on("Page.windowOpen", async (resp) => { if (!resp.url) { @@ -493,7 +493,9 @@ class InteractiveBrowser { handlePageLoad() { this.addOrigin(); - this.saveCookiesFor(this.page.url()); + this.saveCookiesFor(this.page.url()).catch((e) => + logger.warn("Error saving cookies", e), + ); } async saveAllCookies() { @@ -722,4 +724,4 @@ class InteractiveBrowser { } } -main(); +await main(); diff --git a/src/main.ts b/src/main.ts index 4433c6a9..df8b7a00 100755 --- a/src/main.ts +++ b/src/main.ts @@ -25,7 +25,7 @@ async function handleTerminate(signame: string) { setExitOnRedisError(); try { - crawler.checkCanceled(); + await crawler.checkCanceled(); if (!crawler.interrupted) { logger.info("SIGNAL: gracefully finishing current pages..."); @@ -56,4 +56,4 @@ if (process.argv[1].endsWith("qa")) { crawler = new Crawler(); } -crawler.run(); +await crawler.run(); diff --git a/src/replaycrawler.ts b/src/replaycrawler.ts index b44b44ef..519c9a64 100644 --- a/src/replaycrawler.ts +++ b/src/replaycrawler.ts @@ -330,10 +330,10 @@ export class ReplayCrawler extends Crawler { page.frames().length > 1 ) { const frame = page.frames()[1]; - const timeoutid = setTimeout(() => { + const timeoutid = setTimeout(async () => { logger.warn("Reloading RWP Frame, not inited", { url }, "replay"); try { - frame.evaluate("window.location.reload();"); + await frame.evaluate("window.location.reload();"); } catch (e) { logger.error("RWP Reload failed", e, "replay"); } diff --git a/src/util/blockrules.ts b/src/util/blockrules.ts index 2725af6f..5d3238fb 100644 --- a/src/util/blockrules.ts +++ b/src/util/blockrules.ts @@ -104,7 +104,7 @@ export class BlockRules { ); } }; - await browser.interceptRequest(page, onRequest); + browser.interceptRequest(page, onRequest); } // eslint-disable-next-line @typescript-eslint/no-explicit-any diff --git a/src/util/logger.ts b/src/util/logger.ts index d33a6256..a7d3a415 100644 --- a/src/util/logger.ts +++ b/src/util/logger.ts @@ -153,7 +153,7 @@ class Logger { this.crawlState && toLogToRedis.includes(logLevel) ) { - this.crawlState.logError(string); + this.crawlState.logError(string).catch(() => {}); } } @@ -185,7 +185,10 @@ class Logger { this.logAsJSON(`${message}. Quitting`, data, context, "fatal"); if (this.crawlState) { - this.crawlState.setStatus("failed").finally(process.exit(exitCode)); + this.crawlState + .setStatus("failed") + .catch(() => {}) + .finally(process.exit(exitCode)); } else { process.exit(exitCode); } diff --git a/src/util/originoverride.ts b/src/util/originoverride.ts index b74e9f79..a00a2b54 100644 --- a/src/util/originoverride.ts +++ b/src/util/originoverride.ts @@ -34,7 +34,7 @@ export class OriginOverride { } if (!newUrl || !orig) { - request.continue({}, -1); + await request.continue({}, -1); return; } @@ -57,16 +57,16 @@ export class OriginOverride { "originOverride", ); - request.respond({ body, headers: respHeaders, status }, -1); + await request.respond({ body, headers: respHeaders, status }, -1); } catch (e) { logger.warn( "Error overriding origin", { ...formatErr(e), url: page.url() }, "originOverride", ); - request.continue({}, -1); + await request.continue({}, -1); } }; - await browser.interceptRequest(page, onRequest); + browser.interceptRequest(page, onRequest); } } diff --git a/src/util/proxy.ts b/src/util/proxy.ts index 50282b40..cf6a3437 100644 --- a/src/util/proxy.ts +++ b/src/util/proxy.ts @@ -210,7 +210,9 @@ export async function runSSHD(params: Record, detached: boolean) { }, "proxy", ); - runSSHD(params, detached); + runSSHD(params, detached).catch((e) => + logger.error("proxy retry error", e, "proxy"), + ); }); return proxyString; diff --git a/src/util/recorder.ts b/src/util/recorder.ts index 0ce2dd7e..06b7b6e1 100644 --- a/src/util/recorder.ts +++ b/src/util/recorder.ts @@ -177,8 +177,8 @@ export class Recorder { this.frameIdToExecId = frameIdToExecId; // Fetch - cdp.on("Fetch.requestPaused", async (params) => { - this.handleRequestPaused(params, cdp); + cdp.on("Fetch.requestPaused", (params) => { + void this.handleRequestPaused(params, cdp); }); await cdp.send("Fetch.enable", { @@ -398,7 +398,9 @@ export class Recorder { return; } - this.serializeToWARC(reqresp); + this.serializeToWARC(reqresp).catch((e) => + logger.warn("Error Serializing to WARC", e, "recorder"), + ); } handleLoadingFailed(params: Protocol.Network.LoadingFailedEvent) { @@ -449,7 +451,7 @@ export class Recorder { recorder: this, networkId: requestId, }); - this.fetcherQ.add(() => fetcher.load()); + void this.fetcherQ.add(() => fetcher.load()); return; } break; @@ -491,7 +493,9 @@ export class Recorder { return; } - this.serializeToWARC(reqresp); + this.serializeToWARC(reqresp).catch((e) => + logger.warn("Error Serializing to WARC", e, "recorder"), + ); } async handleRequestPaused( @@ -780,7 +784,7 @@ export class Recorder { } else { fetcher = new NetworkLoadStreamAsyncFetcher(opts); } - this.fetcherQ.add(() => fetcher.load()); + void this.fetcherQ.add(() => fetcher.load()); } startPage({ pageid, url }: { pageid: string; url: string }) { diff --git a/src/util/screencaster.ts b/src/util/screencaster.ts index 02221e24..80c94057 100644 --- a/src/util/screencaster.ts +++ b/src/util/screencaster.ts @@ -77,7 +77,9 @@ class WSTransport { ); if (this.allWS.size === 1) { - this.caster.startCastAll(); + this.caster + .startCastAll() + .catch((e) => logger.warn("error starting cast", e, "screencast")); } ws.on("close", () => { @@ -85,7 +87,9 @@ class WSTransport { this.allWS.delete(ws); if (this.allWS.size === 0) { - this.caster.stopCastAll(); + this.caster + .stopCastAll() + .catch((e) => logger.warn("error stopping cast", e, "screencast")); } }); } @@ -119,7 +123,7 @@ class RedisPubSubTransport { this.castChannel = `c:${crawlId}:cast`; this.ctrlChannel = `c:${crawlId}:ctrl`; - this.init(redisUrl); + void this.init(redisUrl); } async init(redisUrl: string) { @@ -138,7 +142,7 @@ class RedisPubSubTransport { case "connect": this.numConnections++; if (this.numConnections === 1) { - this.caster.startCastAll(); + await this.caster.startCastAll(); } else { for (const packet of this.caster.iterCachedData()) { await this.sendAll(packet); @@ -149,7 +153,7 @@ class RedisPubSubTransport { case "disconnect": this.numConnections--; if (this.numConnections === 0) { - this.caster.stopCastAll(); + await this.caster.stopCastAll(); } break; } @@ -227,7 +231,7 @@ class ScreenCaster { this.caches.set(id, data); this.urls.set(id, url); - await this.transport.sendAll({ msg, id, data, url }); + this.transport.sendAll({ msg, id, data, url }); } try { @@ -237,7 +241,7 @@ class ScreenCaster { } }); - if (await this.transport.isActive()) { + if (this.transport.isActive()) { await this.startCast(cdp, id); } } @@ -263,7 +267,7 @@ class ScreenCaster { } if (sendClose) { - await this.transport.sendAll({ msg: "close", id }); + this.transport.sendAll({ msg: "close", id }); } this.cdps.delete(id); diff --git a/src/util/sitemapper.ts b/src/util/sitemapper.ts index e34a9bf1..a13eae16 100644 --- a/src/util/sitemapper.ts +++ b/src/util/sitemapper.ts @@ -229,7 +229,7 @@ export class SitemapReader extends EventEmitter { const { body } = resp; if (!body) { - this.closeSitemap(url); + void this.closeSitemap(url); throw new Error("missing response body"); } // decompress .gz sitemaps @@ -251,7 +251,7 @@ export class SitemapReader extends EventEmitter { readableNodeStream.on("error", (e: Error) => { logger.warn("Error parsing sitemap", formatErr(e), "sitemap"); - this.closeSitemap(url); + void this.closeSitemap(url); }); this.initSaxParser(url, readableNodeStream); @@ -449,7 +449,7 @@ export class SitemapReader extends EventEmitter { return; } - this.queue.add(async () => { + void this.queue.add(async () => { try { await this.parseSitemap(url); } catch (e) { diff --git a/src/util/state.ts b/src/util/state.ts index 7710f124..f29f5454 100644 --- a/src/util/state.ts +++ b/src/util/state.ts @@ -429,7 +429,9 @@ return inx; // can happen async w/o slowing down crawling // each page is still checked if in scope before crawling, even while // queue is being filtered - this.filterQueue(regex); + this.filterQueue(regex).catch((e) => + logger.warn("filtering queue error", e, "exclusion"), + ); break; case "removeExclusion": diff --git a/src/util/warcwriter.ts b/src/util/warcwriter.ts index aed643a9..89aca978 100644 --- a/src/util/warcwriter.ts +++ b/src/util/warcwriter.ts @@ -177,7 +177,7 @@ export class WARCWriter implements IndexerOffsetLength { details: LogDetails | null = null, logContext: LogContext = "writer", ) { - this.warcQ.add(async () => { + void this.warcQ.add(async () => { try { await func(); if (details) { @@ -346,7 +346,7 @@ export async function createWARCInfo(filename: string) { const warcVersion = "WARC/1.1"; const type = "warcinfo"; - const record = await WARCRecord.createWARCInfo( + const record = WARCRecord.createWARCInfo( { filename, type, warcVersion }, warcInfo, ); diff --git a/src/util/worker.ts b/src/util/worker.ts index b56ee58e..abcce726 100644 --- a/src/util/worker.ts +++ b/src/util/worker.ts @@ -377,7 +377,7 @@ export class PageWorker { } else { // indicate that the worker has no more work (mostly for screencasting, status, etc...) // depending on other works, will either get more work or crawl will end - this.crawler.workerIdle(this.id); + await this.crawler.workerIdle(this.id); // check if any pending urls const pending = await crawlState.numPending(); diff --git a/tsconfig.eslint.json b/tsconfig.eslint.json new file mode 100644 index 00000000..7da43865 --- /dev/null +++ b/tsconfig.eslint.json @@ -0,0 +1,8 @@ +{ + "compilerOptions": { + "noEmit": true + }, + "extends": "./tsconfig.json", + "include": ["**/*.ts", "**/*.js", ".*.js"], + "exclude": ["dist", "configs", "crawls"] +}