mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
eslint: add strict await checking: (#684)
- require await / void / catch for promises - don't allow unnecessary await
This commit is contained in:
parent
9cacae6bb6
commit
b42548373d
16 changed files with 79 additions and 46 deletions
|
@ -15,6 +15,8 @@ module.exports = {
|
|||
parserOptions: {
|
||||
ecmaVersion: 12,
|
||||
sourceType: "module",
|
||||
project: ["./tsconfig.eslint.json"],
|
||||
tsconfigRootDir: __dirname,
|
||||
},
|
||||
rules: {
|
||||
"no-constant-condition": ["error", { checkLoops: false }],
|
||||
|
@ -27,6 +29,8 @@ module.exports = {
|
|||
allowNamedExports: true,
|
||||
},
|
||||
],
|
||||
"@typescript-eslint/no-floating-promises": "error",
|
||||
"@typescript-eslint/await-thenable": "error"
|
||||
},
|
||||
reportUnusedDisableDirectives: true,
|
||||
};
|
||||
|
|
|
@ -193,7 +193,7 @@ export class Crawler {
|
|||
data: PageState;
|
||||
// eslint-disable-next-line no-use-before-define
|
||||
crawler: Crawler;
|
||||
}) => NonNullable<unknown>;
|
||||
}) => Promise<void>;
|
||||
|
||||
recording: boolean;
|
||||
|
||||
|
@ -770,7 +770,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
) {
|
||||
await cdp.send("Runtime.enable");
|
||||
|
||||
await cdp.on(
|
||||
cdp.on(
|
||||
"Runtime.executionContextCreated",
|
||||
(params: Protocol.Runtime.ExecutionContextCreatedEvent) => {
|
||||
const { id, auxData } = params.context;
|
||||
|
@ -780,7 +780,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
},
|
||||
);
|
||||
|
||||
await cdp.on(
|
||||
cdp.on(
|
||||
"Runtime.executionContextDestroyed",
|
||||
(params: Protocol.Runtime.ExecutionContextDestroyedEvent) => {
|
||||
const { executionContextId } = params;
|
||||
|
@ -793,7 +793,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
},
|
||||
);
|
||||
|
||||
await cdp.on("Runtime.executionContextsCleared", () => {
|
||||
cdp.on("Runtime.executionContextsCleared", () => {
|
||||
frameIdToExecId.clear();
|
||||
});
|
||||
}
|
||||
|
@ -1877,7 +1877,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
}
|
||||
|
||||
// HTML Pages Only here
|
||||
const frames = await page.frames();
|
||||
const frames = page.frames();
|
||||
|
||||
const filteredFrames = await Promise.allSettled(
|
||||
frames.map((frame) => this.shouldIncludeFrame(frame, logDetails)),
|
||||
|
@ -2190,7 +2190,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
} else {
|
||||
logger.debug("Text Extraction: None");
|
||||
}
|
||||
await fh.write(JSON.stringify(header) + "\n");
|
||||
fh.write(JSON.stringify(header) + "\n");
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error(`"${filename}" creation failed`, err);
|
||||
|
@ -2271,7 +2271,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
}
|
||||
|
||||
try {
|
||||
await pagesFH.write(processedRow);
|
||||
pagesFH.write(processedRow);
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
"Page append failed",
|
||||
|
@ -2336,7 +2336,9 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
{ urlsFound: sitemapper.count, limitHit: sitemapper.atLimit() },
|
||||
"sitemap",
|
||||
);
|
||||
this.crawlState.markSitemapDone();
|
||||
this.crawlState
|
||||
.markSitemapDone()
|
||||
.catch((e) => logger.warn("Error marking sitemap done", e));
|
||||
finished = true;
|
||||
}
|
||||
});
|
||||
|
@ -2354,7 +2356,9 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
"sitemap",
|
||||
);
|
||||
}
|
||||
this.queueInScopeUrls(seedId, [url], 0, 0, true);
|
||||
this.queueInScopeUrls(seedId, [url], 0, 0, true).catch((e) =>
|
||||
logger.warn("Error queuing urls", e, "links"),
|
||||
);
|
||||
if (count >= 100 && !resolved) {
|
||||
logger.info(
|
||||
"Sitemap partially parsed, continue parsing large sitemap in the background",
|
||||
|
@ -2458,7 +2462,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
}
|
||||
|
||||
if (fh) {
|
||||
await fh.end();
|
||||
fh.end();
|
||||
}
|
||||
|
||||
logger.debug(`Combined WARCs saved as: ${generatedCombinedWarcs}`);
|
||||
|
|
|
@ -437,7 +437,7 @@ class InteractiveBrowser {
|
|||
|
||||
// attempt to keep everything to initial tab if headless
|
||||
if (this.params.headless) {
|
||||
cdp.send("Page.enable");
|
||||
cdp.send("Page.enable").catch((e) => logger.warn("Page.enable error", e));
|
||||
|
||||
cdp.on("Page.windowOpen", async (resp) => {
|
||||
if (!resp.url) {
|
||||
|
@ -493,7 +493,9 @@ class InteractiveBrowser {
|
|||
|
||||
handlePageLoad() {
|
||||
this.addOrigin();
|
||||
this.saveCookiesFor(this.page.url());
|
||||
this.saveCookiesFor(this.page.url()).catch((e) =>
|
||||
logger.warn("Error saving cookies", e),
|
||||
);
|
||||
}
|
||||
|
||||
async saveAllCookies() {
|
||||
|
@ -722,4 +724,4 @@ class InteractiveBrowser {
|
|||
}
|
||||
}
|
||||
|
||||
main();
|
||||
await main();
|
||||
|
|
|
@ -25,7 +25,7 @@ async function handleTerminate(signame: string) {
|
|||
setExitOnRedisError();
|
||||
|
||||
try {
|
||||
crawler.checkCanceled();
|
||||
await crawler.checkCanceled();
|
||||
|
||||
if (!crawler.interrupted) {
|
||||
logger.info("SIGNAL: gracefully finishing current pages...");
|
||||
|
@ -56,4 +56,4 @@ if (process.argv[1].endsWith("qa")) {
|
|||
crawler = new Crawler();
|
||||
}
|
||||
|
||||
crawler.run();
|
||||
await crawler.run();
|
||||
|
|
|
@ -330,10 +330,10 @@ export class ReplayCrawler extends Crawler {
|
|||
page.frames().length > 1
|
||||
) {
|
||||
const frame = page.frames()[1];
|
||||
const timeoutid = setTimeout(() => {
|
||||
const timeoutid = setTimeout(async () => {
|
||||
logger.warn("Reloading RWP Frame, not inited", { url }, "replay");
|
||||
try {
|
||||
frame.evaluate("window.location.reload();");
|
||||
await frame.evaluate("window.location.reload();");
|
||||
} catch (e) {
|
||||
logger.error("RWP Reload failed", e, "replay");
|
||||
}
|
||||
|
|
|
@ -104,7 +104,7 @@ export class BlockRules {
|
|||
);
|
||||
}
|
||||
};
|
||||
await browser.interceptRequest(page, onRequest);
|
||||
browser.interceptRequest(page, onRequest);
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
|
|
|
@ -153,7 +153,7 @@ class Logger {
|
|||
this.crawlState &&
|
||||
toLogToRedis.includes(logLevel)
|
||||
) {
|
||||
this.crawlState.logError(string);
|
||||
this.crawlState.logError(string).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -185,7 +185,10 @@ class Logger {
|
|||
this.logAsJSON(`${message}. Quitting`, data, context, "fatal");
|
||||
|
||||
if (this.crawlState) {
|
||||
this.crawlState.setStatus("failed").finally(process.exit(exitCode));
|
||||
this.crawlState
|
||||
.setStatus("failed")
|
||||
.catch(() => {})
|
||||
.finally(process.exit(exitCode));
|
||||
} else {
|
||||
process.exit(exitCode);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ export class OriginOverride {
|
|||
}
|
||||
|
||||
if (!newUrl || !orig) {
|
||||
request.continue({}, -1);
|
||||
await request.continue({}, -1);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -57,16 +57,16 @@ export class OriginOverride {
|
|||
"originOverride",
|
||||
);
|
||||
|
||||
request.respond({ body, headers: respHeaders, status }, -1);
|
||||
await request.respond({ body, headers: respHeaders, status }, -1);
|
||||
} catch (e) {
|
||||
logger.warn(
|
||||
"Error overriding origin",
|
||||
{ ...formatErr(e), url: page.url() },
|
||||
"originOverride",
|
||||
);
|
||||
request.continue({}, -1);
|
||||
await request.continue({}, -1);
|
||||
}
|
||||
};
|
||||
await browser.interceptRequest(page, onRequest);
|
||||
browser.interceptRequest(page, onRequest);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -210,7 +210,9 @@ export async function runSSHD(params: Record<string, any>, detached: boolean) {
|
|||
},
|
||||
"proxy",
|
||||
);
|
||||
runSSHD(params, detached);
|
||||
runSSHD(params, detached).catch((e) =>
|
||||
logger.error("proxy retry error", e, "proxy"),
|
||||
);
|
||||
});
|
||||
|
||||
return proxyString;
|
||||
|
|
|
@ -177,8 +177,8 @@ export class Recorder {
|
|||
this.frameIdToExecId = frameIdToExecId;
|
||||
|
||||
// Fetch
|
||||
cdp.on("Fetch.requestPaused", async (params) => {
|
||||
this.handleRequestPaused(params, cdp);
|
||||
cdp.on("Fetch.requestPaused", (params) => {
|
||||
void this.handleRequestPaused(params, cdp);
|
||||
});
|
||||
|
||||
await cdp.send("Fetch.enable", {
|
||||
|
@ -398,7 +398,9 @@ export class Recorder {
|
|||
return;
|
||||
}
|
||||
|
||||
this.serializeToWARC(reqresp);
|
||||
this.serializeToWARC(reqresp).catch((e) =>
|
||||
logger.warn("Error Serializing to WARC", e, "recorder"),
|
||||
);
|
||||
}
|
||||
|
||||
handleLoadingFailed(params: Protocol.Network.LoadingFailedEvent) {
|
||||
|
@ -449,7 +451,7 @@ export class Recorder {
|
|||
recorder: this,
|
||||
networkId: requestId,
|
||||
});
|
||||
this.fetcherQ.add(() => fetcher.load());
|
||||
void this.fetcherQ.add(() => fetcher.load());
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
@ -491,7 +493,9 @@ export class Recorder {
|
|||
return;
|
||||
}
|
||||
|
||||
this.serializeToWARC(reqresp);
|
||||
this.serializeToWARC(reqresp).catch((e) =>
|
||||
logger.warn("Error Serializing to WARC", e, "recorder"),
|
||||
);
|
||||
}
|
||||
|
||||
async handleRequestPaused(
|
||||
|
@ -780,7 +784,7 @@ export class Recorder {
|
|||
} else {
|
||||
fetcher = new NetworkLoadStreamAsyncFetcher(opts);
|
||||
}
|
||||
this.fetcherQ.add(() => fetcher.load());
|
||||
void this.fetcherQ.add(() => fetcher.load());
|
||||
}
|
||||
|
||||
startPage({ pageid, url }: { pageid: string; url: string }) {
|
||||
|
|
|
@ -77,7 +77,9 @@ class WSTransport {
|
|||
);
|
||||
|
||||
if (this.allWS.size === 1) {
|
||||
this.caster.startCastAll();
|
||||
this.caster
|
||||
.startCastAll()
|
||||
.catch((e) => logger.warn("error starting cast", e, "screencast"));
|
||||
}
|
||||
|
||||
ws.on("close", () => {
|
||||
|
@ -85,7 +87,9 @@ class WSTransport {
|
|||
this.allWS.delete(ws);
|
||||
|
||||
if (this.allWS.size === 0) {
|
||||
this.caster.stopCastAll();
|
||||
this.caster
|
||||
.stopCastAll()
|
||||
.catch((e) => logger.warn("error stopping cast", e, "screencast"));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -119,7 +123,7 @@ class RedisPubSubTransport {
|
|||
this.castChannel = `c:${crawlId}:cast`;
|
||||
this.ctrlChannel = `c:${crawlId}:ctrl`;
|
||||
|
||||
this.init(redisUrl);
|
||||
void this.init(redisUrl);
|
||||
}
|
||||
|
||||
async init(redisUrl: string) {
|
||||
|
@ -138,7 +142,7 @@ class RedisPubSubTransport {
|
|||
case "connect":
|
||||
this.numConnections++;
|
||||
if (this.numConnections === 1) {
|
||||
this.caster.startCastAll();
|
||||
await this.caster.startCastAll();
|
||||
} else {
|
||||
for (const packet of this.caster.iterCachedData()) {
|
||||
await this.sendAll(packet);
|
||||
|
@ -149,7 +153,7 @@ class RedisPubSubTransport {
|
|||
case "disconnect":
|
||||
this.numConnections--;
|
||||
if (this.numConnections === 0) {
|
||||
this.caster.stopCastAll();
|
||||
await this.caster.stopCastAll();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -227,7 +231,7 @@ class ScreenCaster {
|
|||
this.caches.set(id, data);
|
||||
this.urls.set(id, url);
|
||||
|
||||
await this.transport.sendAll({ msg, id, data, url });
|
||||
this.transport.sendAll({ msg, id, data, url });
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -237,7 +241,7 @@ class ScreenCaster {
|
|||
}
|
||||
});
|
||||
|
||||
if (await this.transport.isActive()) {
|
||||
if (this.transport.isActive()) {
|
||||
await this.startCast(cdp, id);
|
||||
}
|
||||
}
|
||||
|
@ -263,7 +267,7 @@ class ScreenCaster {
|
|||
}
|
||||
|
||||
if (sendClose) {
|
||||
await this.transport.sendAll({ msg: "close", id });
|
||||
this.transport.sendAll({ msg: "close", id });
|
||||
}
|
||||
|
||||
this.cdps.delete(id);
|
||||
|
|
|
@ -229,7 +229,7 @@ export class SitemapReader extends EventEmitter {
|
|||
|
||||
const { body } = resp;
|
||||
if (!body) {
|
||||
this.closeSitemap(url);
|
||||
void this.closeSitemap(url);
|
||||
throw new Error("missing response body");
|
||||
}
|
||||
// decompress .gz sitemaps
|
||||
|
@ -251,7 +251,7 @@ export class SitemapReader extends EventEmitter {
|
|||
|
||||
readableNodeStream.on("error", (e: Error) => {
|
||||
logger.warn("Error parsing sitemap", formatErr(e), "sitemap");
|
||||
this.closeSitemap(url);
|
||||
void this.closeSitemap(url);
|
||||
});
|
||||
|
||||
this.initSaxParser(url, readableNodeStream);
|
||||
|
@ -449,7 +449,7 @@ export class SitemapReader extends EventEmitter {
|
|||
return;
|
||||
}
|
||||
|
||||
this.queue.add(async () => {
|
||||
void this.queue.add(async () => {
|
||||
try {
|
||||
await this.parseSitemap(url);
|
||||
} catch (e) {
|
||||
|
|
|
@ -429,7 +429,9 @@ return inx;
|
|||
// can happen async w/o slowing down crawling
|
||||
// each page is still checked if in scope before crawling, even while
|
||||
// queue is being filtered
|
||||
this.filterQueue(regex);
|
||||
this.filterQueue(regex).catch((e) =>
|
||||
logger.warn("filtering queue error", e, "exclusion"),
|
||||
);
|
||||
break;
|
||||
|
||||
case "removeExclusion":
|
||||
|
|
|
@ -177,7 +177,7 @@ export class WARCWriter implements IndexerOffsetLength {
|
|||
details: LogDetails | null = null,
|
||||
logContext: LogContext = "writer",
|
||||
) {
|
||||
this.warcQ.add(async () => {
|
||||
void this.warcQ.add(async () => {
|
||||
try {
|
||||
await func();
|
||||
if (details) {
|
||||
|
@ -346,7 +346,7 @@ export async function createWARCInfo(filename: string) {
|
|||
const warcVersion = "WARC/1.1";
|
||||
const type = "warcinfo";
|
||||
|
||||
const record = await WARCRecord.createWARCInfo(
|
||||
const record = WARCRecord.createWARCInfo(
|
||||
{ filename, type, warcVersion },
|
||||
warcInfo,
|
||||
);
|
||||
|
|
|
@ -377,7 +377,7 @@ export class PageWorker {
|
|||
} else {
|
||||
// indicate that the worker has no more work (mostly for screencasting, status, etc...)
|
||||
// depending on other works, will either get more work or crawl will end
|
||||
this.crawler.workerIdle(this.id);
|
||||
await this.crawler.workerIdle(this.id);
|
||||
|
||||
// check if any pending urls
|
||||
const pending = await crawlState.numPending();
|
||||
|
|
8
tsconfig.eslint.json
Normal file
8
tsconfig.eslint.json
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"noEmit": true
|
||||
},
|
||||
"extends": "./tsconfig.json",
|
||||
"include": ["**/*.ts", "**/*.js", ".*.js"],
|
||||
"exclude": ["dist", "configs", "crawls"]
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue