mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
direct fetch dedup: treat 206 and 0 (status unknown) as 200 to avoid duplicate fetches
This commit is contained in:
parent
cc1b52bde9
commit
1fb6c90627
1 changed files with 10 additions and 2 deletions
|
@ -1361,7 +1361,11 @@ export class Recorder extends EventEmitter {
|
|||
url &&
|
||||
method === "GET" &&
|
||||
!isRedirectStatus(status) &&
|
||||
!(await this.crawlState.addIfNoDupe(WRITE_DUPE_KEY, url, status))
|
||||
!(await this.crawlState.addIfNoDupe(
|
||||
WRITE_DUPE_KEY,
|
||||
url,
|
||||
status === 206 || !status ? 200 : status,
|
||||
))
|
||||
) {
|
||||
logNetwork("Skipping dupe", { url, status, ...this.logDetails });
|
||||
return;
|
||||
|
@ -1515,7 +1519,11 @@ class AsyncFetcher {
|
|||
if (
|
||||
reqresp.method === "GET" &&
|
||||
url &&
|
||||
!(await crawlState.addIfNoDupe(ASYNC_FETCH_DUPE_KEY, url, status))
|
||||
!(await crawlState.addIfNoDupe(
|
||||
ASYNC_FETCH_DUPE_KEY,
|
||||
url,
|
||||
status === 206 || !status ? 200 : status,
|
||||
))
|
||||
) {
|
||||
if (!this.ignoreDupe) {
|
||||
this.reqresp.asyncLoading = false;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue