mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 22:43:17 +00:00
collection name: allow interpolation of ts, crawl id, hostname
This commit is contained in:
parent
dd381899e1
commit
952f93293f
2 changed files with 15 additions and 4 deletions
|
@ -11,6 +11,7 @@ const { hideBin } = require("yargs/helpers");
|
||||||
const { NewWindowPage} = require("./screencaster");
|
const { NewWindowPage} = require("./screencaster");
|
||||||
const { BEHAVIOR_LOG_FUNC, WAIT_UNTIL_OPTS } = require("./constants");
|
const { BEHAVIOR_LOG_FUNC, WAIT_UNTIL_OPTS } = require("./constants");
|
||||||
const { ScopedSeed } = require("./seeds");
|
const { ScopedSeed } = require("./seeds");
|
||||||
|
const { interpolateFilename } = require("./storage");
|
||||||
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
@ -114,7 +115,8 @@ class ArgParser {
|
||||||
alias: "c",
|
alias: "c",
|
||||||
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
|
describe: "Collection name to crawl to (replay will be accessible under this name in pywb preview)",
|
||||||
type: "string",
|
type: "string",
|
||||||
default: process.env.CRAWL_ID || `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
|
//default: process.env.CRAWL_ID || `capture-${new Date().toISOString().slice(0,19)}`.replace(/:/g, "-")
|
||||||
|
default: "crawl-@ts",
|
||||||
},
|
},
|
||||||
|
|
||||||
"headless": {
|
"headless": {
|
||||||
|
@ -292,6 +294,8 @@ class ArgParser {
|
||||||
|
|
||||||
|
|
||||||
validateArgs(argv) {
|
validateArgs(argv) {
|
||||||
|
argv.collection = interpolateFilename(argv.collection, argv.crawlId);
|
||||||
|
|
||||||
// Check that the collection name is valid.
|
// Check that the collection name is valid.
|
||||||
if (argv.collection.search(/^[\w][\w-]*$/) === -1){
|
if (argv.collection.search(/^[\w][\w-]*$/) === -1){
|
||||||
throw new Error(`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`);
|
throw new Error(`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`);
|
||||||
|
|
|
@ -54,9 +54,7 @@ class S3StorageSync
|
||||||
this.crawlId = crawlId;
|
this.crawlId = crawlId;
|
||||||
this.webhookUrl = webhookUrl;
|
this.webhookUrl = webhookUrl;
|
||||||
|
|
||||||
filename = filename.replace("@ts", new Date().toISOString().replace(/[:TZz.]/g, ""));
|
filename = interpolateFilename(filename, this.crawlId);
|
||||||
filename = filename.replace("@hostname", os.hostname());
|
|
||||||
filename = filename.replace("@id", this.crawlId);
|
|
||||||
|
|
||||||
this.waczFilename = "data/" + filename;
|
this.waczFilename = "data/" + filename;
|
||||||
}
|
}
|
||||||
|
@ -115,7 +113,16 @@ function checksumFile(hashName, path) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function interpolateFilename(filename, crawlId) {
|
||||||
|
filename = filename.replace("@ts", new Date().toISOString().replace(/[:TZz.]/g, ""));
|
||||||
|
filename = filename.replace("@hostname", os.hostname());
|
||||||
|
filename = filename.replace("@id", crawlId);
|
||||||
|
return filename;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
module.exports.S3StorageSync = S3StorageSync;
|
module.exports.S3StorageSync = S3StorageSync;
|
||||||
module.exports.getFileSize = getFileSize;
|
module.exports.getFileSize = getFileSize;
|
||||||
|
module.exports.interpolateFilename = interpolateFilename;
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue