direct fetch dedup: treat 206 and 0 (status unknown) as 200 to avoid duplicate fetches

This commit is contained in:
Ilya Kreymer 2025-05-05 17:43:29 -07:00
parent cc1b52bde9
commit 1fb6c90627

View file

@ -1361,7 +1361,11 @@ export class Recorder extends EventEmitter {
url &&
method === "GET" &&
!isRedirectStatus(status) &&
!(await this.crawlState.addIfNoDupe(WRITE_DUPE_KEY, url, status))
!(await this.crawlState.addIfNoDupe(
WRITE_DUPE_KEY,
url,
status === 206 || !status ? 200 : status,
))
) {
logNetwork("Skipping dupe", { url, status, ...this.logDetails });
return;
@ -1515,7 +1519,11 @@ class AsyncFetcher {
if (
reqresp.method === "GET" &&
url &&
!(await crawlState.addIfNoDupe(ASYNC_FETCH_DUPE_KEY, url, status))
!(await crawlState.addIfNoDupe(
ASYNC_FETCH_DUPE_KEY,
url,
status === 206 || !status ? 200 : status,
))
) {
if (!this.ignoreDupe) {
this.reqresp.asyncLoading = false;