From 10a42d658f07b5ebec0ce875e68fd08dfdb66c46 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Wed, 28 Feb 2024 15:12:54 -0800 Subject: [PATCH] don't remove 200 status code use 'status' instead of 'statusCode' --- src/crawler.ts | 20 ++++++++++---------- src/util/state.ts | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/crawler.ts b/src/crawler.ts index 9faf6b3e..5bd2925f 100644 --- a/src/crawler.ts +++ b/src/crawler.ts @@ -91,7 +91,7 @@ type PageEntry = { text?: string; favIconUrl?: string; ts?: string; - statusCode?: number; + status?: number; }; // ============================================================================ @@ -720,7 +720,7 @@ self.__bx_behaviors.selectMainBehavior(); if (mime) { data.mime = mime; } - data.statusCode = 200; + data.status = 200; logger.info( "Direct fetch successful", { url, ...logDetails }, @@ -789,7 +789,7 @@ self.__bx_behaviors.selectMainBehavior(); data.loadState = LoadState.EXTRACTION_DONE; - if (data.statusCode >= 400) { + if (data.status >= 400) { return; } @@ -1587,17 +1587,17 @@ self.__bx_behaviors.selectMainBehavior(); } // Handle 4xx or 5xx response as a page load error - const statusCode = resp.status(); - data.statusCode = statusCode; + const status = resp.status(); + data.status = status; if (isChromeError) { if (failCrawlOnError) { logger.fatal("Seed Page Load Error, failing crawl", { - statusCode, + status, ...logDetails, }); } else { logger.error("Page Crashed on Load", { - statusCode, + status, ...logDetails, }); throw new Error("logged"); @@ -1960,7 +1960,7 @@ self.__bx_behaviors.selectMainBehavior(); mime, favicon, ts, - statusCode, + status, }: PageState) { const row: PageEntry = { id: pageid!, url, title, loadState }; @@ -1972,8 +1972,8 @@ self.__bx_behaviors.selectMainBehavior(); row.mime = mime; } - if (statusCode && statusCode !== 200) { - row.statusCode = statusCode; + if (status) { + row.status = status; } if (this.params.writePagesToRedis) { diff --git a/src/util/state.ts b/src/util/state.ts index e6152039..c28fa814 100644 --- a/src/util/state.ts +++ b/src/util/state.ts @@ -46,7 +46,7 @@ export class PageState { depth: number; extraHops: number; - statusCode: number; + status: number; workerid!: WorkerId; @@ -72,7 +72,7 @@ export class PageState { this.seedId = redisData.seedId; this.depth = redisData.depth; this.extraHops = redisData.extraHops || 0; - this.statusCode = 0; + this.status = 0; } }