mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
profile download improvements:
- log when profie download starts - ensure there is a timeout to profile download attempt (60 secs) - attempt retry 2 more times if initial profile download times out - fail crawl after 3 retries, if profile can not be downloaded successfully bumpt to 1.8.2
This commit is contained in:
parent
6f26148a9b
commit
e58680dab4
2 changed files with 37 additions and 6 deletions
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "1.8.1",
|
||||
"version": "1.8.2",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
|
|
@ -15,9 +15,13 @@ import { logger } from "./logger.js";
|
|||
import getFolderSize from "get-folder-size";
|
||||
|
||||
import { WACZ } from "./wacz.js";
|
||||
import { sleep, timedRun } from "./timing.js";
|
||||
import { DEFAULT_MAX_RETRIES, ExitCodes } from "./constants.js";
|
||||
|
||||
const DEFAULT_REGION = "us-east-1";
|
||||
|
||||
const DOWNLOAD_PROFILE_MAX_TIME = 60;
|
||||
|
||||
// ===========================================================================
|
||||
export class S3StorageSync {
|
||||
fullPrefix: string;
|
||||
|
@ -134,11 +138,38 @@ export class S3StorageSync {
|
|||
}
|
||||
|
||||
async downloadFile(srcFilename: string, destFilename: string) {
|
||||
await this.client.fGetObject(
|
||||
this.bucketName,
|
||||
this.objectPrefix + srcFilename,
|
||||
destFilename,
|
||||
);
|
||||
let count = 0;
|
||||
logger.debug("Downloading profile", { srcFilename }, "storage");
|
||||
while (true) {
|
||||
try {
|
||||
await timedRun(
|
||||
this.client.fGetObject(
|
||||
this.bucketName,
|
||||
this.objectPrefix + srcFilename,
|
||||
destFilename,
|
||||
),
|
||||
DOWNLOAD_PROFILE_MAX_TIME,
|
||||
"Timeout out downloading profile",
|
||||
{},
|
||||
"storage",
|
||||
true,
|
||||
);
|
||||
break;
|
||||
} catch (e) {
|
||||
if (count <= DEFAULT_MAX_RETRIES) {
|
||||
count += 1;
|
||||
await sleep(5);
|
||||
logger.warn("Retry downloading profile", {}, "storage");
|
||||
} else {
|
||||
logger.fatal(
|
||||
"Could not download profile, exiting",
|
||||
{},
|
||||
"storage",
|
||||
ExitCodes.Failed,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async uploadCollWACZ(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue