From e58680dab43a21115a8ea94477c92d96aef1498c Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 16 Oct 2025 12:40:26 -0700 Subject: [PATCH] profile download improvements: - log when profie download starts - ensure there is a timeout to profile download attempt (60 secs) - attempt retry 2 more times if initial profile download times out - fail crawl after 3 retries, if profile can not be downloaded successfully bumpt to 1.8.2 --- package.json | 2 +- src/util/storage.ts | 41 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/package.json b/package.json index debd1ec9..a132bcf6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "browsertrix-crawler", - "version": "1.8.1", + "version": "1.8.2", "main": "browsertrix-crawler", "type": "module", "repository": "https://github.com/webrecorder/browsertrix-crawler", diff --git a/src/util/storage.ts b/src/util/storage.ts index 862fa773..11f32799 100644 --- a/src/util/storage.ts +++ b/src/util/storage.ts @@ -15,9 +15,13 @@ import { logger } from "./logger.js"; import getFolderSize from "get-folder-size"; import { WACZ } from "./wacz.js"; +import { sleep, timedRun } from "./timing.js"; +import { DEFAULT_MAX_RETRIES, ExitCodes } from "./constants.js"; const DEFAULT_REGION = "us-east-1"; +const DOWNLOAD_PROFILE_MAX_TIME = 60; + // =========================================================================== export class S3StorageSync { fullPrefix: string; @@ -134,11 +138,38 @@ export class S3StorageSync { } async downloadFile(srcFilename: string, destFilename: string) { - await this.client.fGetObject( - this.bucketName, - this.objectPrefix + srcFilename, - destFilename, - ); + let count = 0; + logger.debug("Downloading profile", { srcFilename }, "storage"); + while (true) { + try { + await timedRun( + this.client.fGetObject( + this.bucketName, + this.objectPrefix + srcFilename, + destFilename, + ), + DOWNLOAD_PROFILE_MAX_TIME, + "Timeout out downloading profile", + {}, + "storage", + true, + ); + break; + } catch (e) { + if (count <= DEFAULT_MAX_RETRIES) { + count += 1; + await sleep(5); + logger.warn("Retry downloading profile", {}, "storage"); + } else { + logger.fatal( + "Could not download profile, exiting", + {}, + "storage", + ExitCodes.Failed, + ); + } + } + } } async uploadCollWACZ(