diff --git a/crawler.js b/crawler.js index de7f49a3..096255b0 100644 --- a/crawler.js +++ b/crawler.js @@ -103,7 +103,7 @@ class Crawler { } } } - + bootstrap() { let opts = {}; if (this.params.logging.includes("pywb")) { @@ -114,7 +114,7 @@ class Crawler { } this.configureUA(); - + this.headers = {"User-Agent": this.userAgent}; child_process.spawn("redis-server", {...opts, cwd: "/tmp/"}); @@ -304,7 +304,13 @@ class Crawler { //argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1); argv.scope = [new RegExp("^" + this.rxEscape(argv.url.slice(0, argv.url.lastIndexOf("/") + 1)))]; } - + + + // Check that the collection name is valid. + if (argv.collection.search(/^[\w][\w-]*$/) === -1){ + throw new Error(`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`); + } + argv.timeout *= 1000; // waitUntil condition must be: load, domcontentloaded, networkidle0, networkidle2 diff --git a/tests/collection_name.test.js b/tests/collection_name.test.js new file mode 100644 index 00000000..1711e44e --- /dev/null +++ b/tests/collection_name.test.js @@ -0,0 +1,37 @@ +const util = require("util"); +const exec = util.promisify(require("child_process").exec); + +test("check that the collection name is properly validation", async () => { + jest.setTimeout(30000); + let passed = ""; + + try{ + const data = await exec("docker-compose run crawler crawl --url http://www.example.com/ --collection valid_collection-nameisvalid"); + if (data.stdout.includes("Waiting 5s to ensure WARCs are finished")){ + passed = true; + } + else{ + passed = false; + } + } + catch (error) { + passed = false; + } + expect(passed).toBe(true); +}); + + +test("check that the collection name is not accepted if it doesn't meets our standards", async () => { + jest.setTimeout(30000); + let passed = ""; + + try{ + await exec("docker-compose run crawler crawl --url http://www.example.com/ --collection invalid_c!!ollection-nameisvalid"); + passed = true; + } + catch(e){ + passed = false; + } + expect(passed).toBe(false); + +});