mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Fix using cached WACZ filename if already set ahead of time. (#783)
- if <uid>:nextWacz filename already exists, actually get it and use that! - don't merge cdx if not generating wacz yet, use same condition for both bump version to 1.5.8 - fix follow-up to #748, fix #747
This commit is contained in:
parent
2aec2e1a33
commit
9a7ac9bef1
3 changed files with 31 additions and 13 deletions
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "browsertrix-crawler",
|
"name": "browsertrix-crawler",
|
||||||
"version": "1.5.7",
|
"version": "1.5.8",
|
||||||
"main": "browsertrix-crawler",
|
"main": "browsertrix-crawler",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||||
|
|
|
@ -1730,9 +1730,13 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
await this.combineWARC();
|
await this.combineWARC();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const generateFiles =
|
||||||
|
!this.params.dryRun &&
|
||||||
|
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal);
|
||||||
|
|
||||||
if (
|
if (
|
||||||
(this.params.generateCDX || this.params.generateWACZ) &&
|
(this.params.generateCDX || this.params.generateWACZ) &&
|
||||||
!this.params.dryRun
|
generateFiles
|
||||||
) {
|
) {
|
||||||
logger.info("Merging CDX");
|
logger.info("Merging CDX");
|
||||||
await this.crawlState.setStatus(
|
await this.crawlState.setStatus(
|
||||||
|
@ -1746,11 +1750,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (
|
if (this.params.generateWACZ && generateFiles) {
|
||||||
this.params.generateWACZ &&
|
|
||||||
!this.params.dryRun &&
|
|
||||||
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
|
|
||||||
) {
|
|
||||||
const uploaded = await this.generateWACZ();
|
const uploaded = await this.generateWACZ();
|
||||||
|
|
||||||
if (uploaded && this.uploadAndDeleteLocal) {
|
if (uploaded && this.uploadAndDeleteLocal) {
|
||||||
|
|
|
@ -420,12 +420,30 @@ return inx;
|
||||||
async setWACZFilename(): Promise<string> {
|
async setWACZFilename(): Promise<string> {
|
||||||
const filename = process.env.STORE_FILENAME || "@ts-@id.wacz";
|
const filename = process.env.STORE_FILENAME || "@ts-@id.wacz";
|
||||||
this.waczFilename = interpolateFilename(filename, this.key);
|
this.waczFilename = interpolateFilename(filename, this.key);
|
||||||
await this.redis.hsetnx(
|
if (
|
||||||
`${this.key}:nextWacz`,
|
!(await this.redis.hsetnx(
|
||||||
this.uid,
|
`${this.key}:nextWacz`,
|
||||||
this.waczFilename,
|
this.uid,
|
||||||
);
|
this.waczFilename,
|
||||||
return this.waczFilename;
|
))
|
||||||
|
) {
|
||||||
|
this.waczFilename = await this.redis.hget(
|
||||||
|
`${this.key}:nextWacz`,
|
||||||
|
this.uid,
|
||||||
|
);
|
||||||
|
logger.debug(
|
||||||
|
"Keeping WACZ Filename",
|
||||||
|
{ filename: this.waczFilename },
|
||||||
|
"state",
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
logger.debug(
|
||||||
|
"Using New WACZ Filename",
|
||||||
|
{ filename: this.waczFilename },
|
||||||
|
"state",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return this.waczFilename!;
|
||||||
}
|
}
|
||||||
|
|
||||||
async getWACZFilename(): Promise<string> {
|
async getWACZFilename(): Promise<string> {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue