Fix using cached WACZ filename if already set ahead of time. (#783)

- if <uid>:nextWacz filename already exists, actually get it and use
that!
- don't merge cdx if not generating wacz yet, use same condition for
both bump version to 1.5.8
- fix follow-up to #748, fix #747
This commit is contained in:
Ilya Kreymer 2025-02-28 17:58:56 -08:00 committed by GitHub
parent 2aec2e1a33
commit 9a7ac9bef1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 31 additions and 13 deletions

View file

@ -1,6 +1,6 @@
{ {
"name": "browsertrix-crawler", "name": "browsertrix-crawler",
"version": "1.5.7", "version": "1.5.8",
"main": "browsertrix-crawler", "main": "browsertrix-crawler",
"type": "module", "type": "module",
"repository": "https://github.com/webrecorder/browsertrix-crawler", "repository": "https://github.com/webrecorder/browsertrix-crawler",

View file

@ -1730,9 +1730,13 @@ self.__bx_behaviors.selectMainBehavior();
await this.combineWARC(); await this.combineWARC();
} }
const generateFiles =
!this.params.dryRun &&
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal);
if ( if (
(this.params.generateCDX || this.params.generateWACZ) && (this.params.generateCDX || this.params.generateWACZ) &&
!this.params.dryRun generateFiles
) { ) {
logger.info("Merging CDX"); logger.info("Merging CDX");
await this.crawlState.setStatus( await this.crawlState.setStatus(
@ -1746,11 +1750,7 @@ self.__bx_behaviors.selectMainBehavior();
); );
} }
if ( if (this.params.generateWACZ && generateFiles) {
this.params.generateWACZ &&
!this.params.dryRun &&
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
) {
const uploaded = await this.generateWACZ(); const uploaded = await this.generateWACZ();
if (uploaded && this.uploadAndDeleteLocal) { if (uploaded && this.uploadAndDeleteLocal) {

View file

@ -420,12 +420,30 @@ return inx;
async setWACZFilename(): Promise<string> { async setWACZFilename(): Promise<string> {
const filename = process.env.STORE_FILENAME || "@ts-@id.wacz"; const filename = process.env.STORE_FILENAME || "@ts-@id.wacz";
this.waczFilename = interpolateFilename(filename, this.key); this.waczFilename = interpolateFilename(filename, this.key);
await this.redis.hsetnx( if (
!(await this.redis.hsetnx(
`${this.key}:nextWacz`, `${this.key}:nextWacz`,
this.uid, this.uid,
this.waczFilename, this.waczFilename,
))
) {
this.waczFilename = await this.redis.hget(
`${this.key}:nextWacz`,
this.uid,
); );
return this.waczFilename; logger.debug(
"Keeping WACZ Filename",
{ filename: this.waczFilename },
"state",
);
} else {
logger.debug(
"Using New WACZ Filename",
{ filename: this.waczFilename },
"state",
);
}
return this.waczFilename!;
} }
async getWACZFilename(): Promise<string> { async getWACZFilename(): Promise<string> {