Fix using cached WACZ filename if already set ahead of time. (#783)

- if <uid>:nextWacz filename already exists, actually get it and use
that!
- don't merge cdx if not generating wacz yet, use same condition for
both bump version to 1.5.8
- fix follow-up to #748, fix #747
This commit is contained in:
Ilya Kreymer 2025-02-28 17:58:56 -08:00 committed by GitHub
parent 2aec2e1a33
commit 9a7ac9bef1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 31 additions and 13 deletions

View file

@ -1,6 +1,6 @@
{
"name": "browsertrix-crawler",
"version": "1.5.7",
"version": "1.5.8",
"main": "browsertrix-crawler",
"type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler",

View file

@ -1730,9 +1730,13 @@ self.__bx_behaviors.selectMainBehavior();
await this.combineWARC();
}
const generateFiles =
!this.params.dryRun &&
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal);
if (
(this.params.generateCDX || this.params.generateWACZ) &&
!this.params.dryRun
generateFiles
) {
logger.info("Merging CDX");
await this.crawlState.setStatus(
@ -1746,11 +1750,7 @@ self.__bx_behaviors.selectMainBehavior();
);
}
if (
this.params.generateWACZ &&
!this.params.dryRun &&
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
) {
if (this.params.generateWACZ && generateFiles) {
const uploaded = await this.generateWACZ();
if (uploaded && this.uploadAndDeleteLocal) {

View file

@ -420,12 +420,30 @@ return inx;
async setWACZFilename(): Promise<string> {
const filename = process.env.STORE_FILENAME || "@ts-@id.wacz";
this.waczFilename = interpolateFilename(filename, this.key);
await this.redis.hsetnx(
`${this.key}:nextWacz`,
this.uid,
this.waczFilename,
);
return this.waczFilename;
if (
!(await this.redis.hsetnx(
`${this.key}:nextWacz`,
this.uid,
this.waczFilename,
))
) {
this.waczFilename = await this.redis.hget(
`${this.key}:nextWacz`,
this.uid,
);
logger.debug(
"Keeping WACZ Filename",
{ filename: this.waczFilename },
"state",
);
} else {
logger.debug(
"Using New WACZ Filename",
{ filename: this.waczFilename },
"state",
);
}
return this.waczFilename!;
}
async getWACZFilename(): Promise<string> {