mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 22:43:17 +00:00
collection name: allow interpolation of ts, crawl id, hostname
This commit is contained in:
parent
dd381899e1
commit
952f93293f
2 changed files with 15 additions and 4 deletions
|
@ -11,6 +11,7 @@ const { hideBin } = require("yargs/helpers");
|
|||
const { NewWindowPage} = require("./screencaster");
|
||||
const { BEHAVIOR_LOG_FUNC, WAIT_UNTIL_OPTS } = require("./constants");
|
||||
const { ScopedSeed } = require("./seeds");
|
||||
const { interpolateFilename } = require("./storage");
|
||||
|
||||
|
||||
// ============================================================================
|
||||
|
@ -114,7 +115,8 @@ class ArgParser {
|
|||
alias: "c",
|
||||
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
|
||||
type: "string",
|
||||
default: process.env.CRAWL_ID || `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
|
||||
//default: process.env.CRAWL_ID || `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
|
||||
default: "crawl-@ts",
|
||||
},
|
||||
|
||||
"headless": {
|
||||
|
@ -292,6 +294,8 @@ class ArgParser {
|
|||
|
||||
|
||||
validateArgs(argv) {
|
||||
argv.collection = interpolateFilename(argv.collection, argv.crawlId);
|
||||
|
||||
// Check that the collection name is valid.
|
||||
if (argv.collection.search(/^[\w][\w-]*$/) === -1){
|
||||
throw new Error(`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`);
|
||||
|
|
|
@ -54,9 +54,7 @@ class S3StorageSync
|
|||
this.crawlId = crawlId;
|
||||
this.webhookUrl = webhookUrl;
|
||||
|
||||
filename = filename.replace("@ts", new Date().toISOString().replace(/[:TZz.]/g, ""));
|
||||
filename = filename.replace("@hostname", os.hostname());
|
||||
filename = filename.replace("@id", this.crawlId);
|
||||
filename = interpolateFilename(filename, this.crawlId);
|
||||
|
||||
this.waczFilename = "data/" + filename;
|
||||
}
|
||||
|
@ -115,7 +113,16 @@ function checksumFile(hashName, path) {
|
|||
});
|
||||
}
|
||||
|
||||
function interpolateFilename(filename, crawlId) {
|
||||
filename = filename.replace("@ts", new Date().toISOString().replace(/[:TZz.]/g, ""));
|
||||
filename = filename.replace("@hostname", os.hostname());
|
||||
filename = filename.replace("@id", crawlId);
|
||||
return filename;
|
||||
}
|
||||
|
||||
|
||||
module.exports.S3StorageSync = S3StorageSync;
|
||||
module.exports.getFileSize = getFileSize;
|
||||
module.exports.interpolateFilename = interpolateFilename;
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue