mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-12-08 06:09:48 +00:00
always return wacz, store wacz depends only for current wacz
store crawlid depends for entire crawl
This commit is contained in:
parent
9fba5da0ce
commit
c4f07c4e59
2 changed files with 11 additions and 5 deletions
|
|
@ -1899,12 +1899,14 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
if (this.deduping) {
|
||||
await this.crawlState.setStatus("post-crawl");
|
||||
await this.crawlState.updateDedupSource(wacz);
|
||||
|
||||
await this.crawlState.clearDupeFileRef();
|
||||
}
|
||||
|
||||
await this.crawlState.clearWACZFilename();
|
||||
}
|
||||
|
||||
if (wacz && this.uploadAndDeleteLocal) {
|
||||
if (wacz && this.storage && this.uploadAndDeleteLocal) {
|
||||
await this.crawlState.setArchiveSize(0);
|
||||
|
||||
logger.info(
|
||||
|
|
@ -2033,9 +2035,8 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
const targetFilename = await this.crawlState.getWACZFilename();
|
||||
|
||||
await this.storage.uploadCollWACZ(wacz, targetFilename, isFinished);
|
||||
|
||||
return wacz;
|
||||
}
|
||||
return wacz;
|
||||
} catch (e) {
|
||||
logger.error("Error creating WACZ", e);
|
||||
if (!streaming) {
|
||||
|
|
|
|||
|
|
@ -1341,11 +1341,16 @@ return inx;
|
|||
|
||||
// DEPENDENT CRAWLS FOR DEDUPE
|
||||
async addDupeCrawlRef(crawlId: string, index: string) {
|
||||
await this.redis.sadd(`${this.crawlId}:dindex`, crawlId + " " + index);
|
||||
await this.redis.sadd(`${this.uid}:dindex`, crawlId + " " + index);
|
||||
await this.redis.sadd(`${this.crawlId}:depCrawls`, crawlId);
|
||||
}
|
||||
|
||||
async clearDupeFileRef() {
|
||||
await this.redis.del(`${this.uid}:dindex`);
|
||||
}
|
||||
|
||||
async getDupeDependentSources() {
|
||||
const dependIndexes = await this.redis.smembers(`${this.crawlId}:dindex`);
|
||||
const dependIndexes = await this.redis.smembers(`${this.uid}:dindex`);
|
||||
const crawlIds = [];
|
||||
for (const value of dependIndexes) {
|
||||
const [crawlId, index] = value.split(" ");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue