mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
sitemap: only parse sitemap if original seed was added, skip if seed is already in the list/was skipped
This commit is contained in:
parent
5ba6c33bff
commit
5d1a391d5e
1 changed files with 2 additions and 3 deletions
|
@ -758,9 +758,8 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
if (this.limitHit) {
|
if (this.limitHit) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
// parse sitemap if seed is queued, otherwise assume already parsed
|
||||||
|
} else if (seed.sitemap) {
|
||||||
if (seed.sitemap) {
|
|
||||||
await this.parseSitemap(seed.sitemap, i);
|
await this.parseSitemap(seed.sitemap, i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue