mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
Fix using cached WACZ filename if already set ahead of time. (#783)
- if <uid>:nextWacz filename already exists, actually get it and use that! - don't merge cdx if not generating wacz yet, use same condition for both bump version to 1.5.8 - fix follow-up to #748, fix #747
This commit is contained in:
parent
2aec2e1a33
commit
9a7ac9bef1
3 changed files with 31 additions and 13 deletions
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "1.5.7",
|
||||
"version": "1.5.8",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
|
|
@ -1730,9 +1730,13 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
await this.combineWARC();
|
||||
}
|
||||
|
||||
const generateFiles =
|
||||
!this.params.dryRun &&
|
||||
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal);
|
||||
|
||||
if (
|
||||
(this.params.generateCDX || this.params.generateWACZ) &&
|
||||
!this.params.dryRun
|
||||
generateFiles
|
||||
) {
|
||||
logger.info("Merging CDX");
|
||||
await this.crawlState.setStatus(
|
||||
|
@ -1746,11 +1750,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
);
|
||||
}
|
||||
|
||||
if (
|
||||
this.params.generateWACZ &&
|
||||
!this.params.dryRun &&
|
||||
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
|
||||
) {
|
||||
if (this.params.generateWACZ && generateFiles) {
|
||||
const uploaded = await this.generateWACZ();
|
||||
|
||||
if (uploaded && this.uploadAndDeleteLocal) {
|
||||
|
|
|
@ -420,12 +420,30 @@ return inx;
|
|||
async setWACZFilename(): Promise<string> {
|
||||
const filename = process.env.STORE_FILENAME || "@ts-@id.wacz";
|
||||
this.waczFilename = interpolateFilename(filename, this.key);
|
||||
await this.redis.hsetnx(
|
||||
`${this.key}:nextWacz`,
|
||||
this.uid,
|
||||
this.waczFilename,
|
||||
);
|
||||
return this.waczFilename;
|
||||
if (
|
||||
!(await this.redis.hsetnx(
|
||||
`${this.key}:nextWacz`,
|
||||
this.uid,
|
||||
this.waczFilename,
|
||||
))
|
||||
) {
|
||||
this.waczFilename = await this.redis.hget(
|
||||
`${this.key}:nextWacz`,
|
||||
this.uid,
|
||||
);
|
||||
logger.debug(
|
||||
"Keeping WACZ Filename",
|
||||
{ filename: this.waczFilename },
|
||||
"state",
|
||||
);
|
||||
} else {
|
||||
logger.debug(
|
||||
"Using New WACZ Filename",
|
||||
{ filename: this.waczFilename },
|
||||
"state",
|
||||
);
|
||||
}
|
||||
return this.waczFilename!;
|
||||
}
|
||||
|
||||
async getWACZFilename(): Promise<string> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue