mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
add collection name validation (#37)
* add collection name validation * linter fix * add tests and optimize * linter fix * move to validateargs * properly reference collection * Update regex and error message Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local>
This commit is contained in:
parent
24e2c4ddf8
commit
c9f8fe051c
2 changed files with 46 additions and 3 deletions
12
crawler.js
12
crawler.js
|
@ -103,7 +103,7 @@ class Crawler {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bootstrap() {
|
||||
let opts = {};
|
||||
if (this.params.logging.includes("pywb")) {
|
||||
|
@ -114,7 +114,7 @@ class Crawler {
|
|||
}
|
||||
|
||||
this.configureUA();
|
||||
|
||||
|
||||
this.headers = {"User-Agent": this.userAgent};
|
||||
|
||||
child_process.spawn("redis-server", {...opts, cwd: "/tmp/"});
|
||||
|
@ -304,7 +304,13 @@ class Crawler {
|
|||
//argv.scope = url.href.slice(0, url.href.lastIndexOf("/") + 1);
|
||||
argv.scope = [new RegExp("^" + this.rxEscape(argv.url.slice(0, argv.url.lastIndexOf("/") + 1)))];
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Check that the collection name is valid.
|
||||
if (argv.collection.search(/^[\w][\w-]*$/) === -1){
|
||||
throw new Error(`\n${argv.collection} is an invalid collection name. Please supply a collection name only using alphanumeric characters and the following characters [_ - ]\n`);
|
||||
}
|
||||
|
||||
argv.timeout *= 1000;
|
||||
|
||||
// waitUntil condition must be: load, domcontentloaded, networkidle0, networkidle2
|
||||
|
|
37
tests/collection_name.test.js
Normal file
37
tests/collection_name.test.js
Normal file
|
@ -0,0 +1,37 @@
|
|||
const util = require("util");
|
||||
const exec = util.promisify(require("child_process").exec);
|
||||
|
||||
test("check that the collection name is properly validation", async () => {
|
||||
jest.setTimeout(30000);
|
||||
let passed = "";
|
||||
|
||||
try{
|
||||
const data = await exec("docker-compose run crawler crawl --url http://www.example.com/ --collection valid_collection-nameisvalid");
|
||||
if (data.stdout.includes("Waiting 5s to ensure WARCs are finished")){
|
||||
passed = true;
|
||||
}
|
||||
else{
|
||||
passed = false;
|
||||
}
|
||||
}
|
||||
catch (error) {
|
||||
passed = false;
|
||||
}
|
||||
expect(passed).toBe(true);
|
||||
});
|
||||
|
||||
|
||||
test("check that the collection name is not accepted if it doesn't meets our standards", async () => {
|
||||
jest.setTimeout(30000);
|
||||
let passed = "";
|
||||
|
||||
try{
|
||||
await exec("docker-compose run crawler crawl --url http://www.example.com/ --collection invalid_c!!ollection-nameisvalid");
|
||||
passed = true;
|
||||
}
|
||||
catch(e){
|
||||
passed = false;
|
||||
}
|
||||
expect(passed).toBe(false);
|
||||
|
||||
});
|
Loading…
Add table
Add a link
Reference in a new issue