Fail crawl with fatal message if custom behavior isn't loaded (#799)

Fixes #797 

The crawler will now exit with a fatal log message and exit code 17 if:

- A Git repository specified with `--customBehavior` cannot be cloned
successfully (new)
- A custom behavior file at a URL specified with `--customBehavior` is
not fetched successfully (new)
- No custom behaviors are collected at a local filepath specified with
`--customBehavior`, or if an error is thrown while attempting to collect
files from a nonexistent path (new)
- Any custom behaviors collected fail `Browser.checkScript` validation
(existing behavior)

Tests have also been added accordingly.
This commit is contained in:
Tessa Walsh 2025-03-31 20:35:30 -04:00 committed by GitHub
parent e751929a7a
commit 5fedde6eee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 90 additions and 26 deletions

View file

@ -120,3 +120,48 @@ test("test invalid behavior exit", async () => {
// logger fatal exit code
expect(status).toBe(17);
});
test("test crawl exits if behavior not fetched from url", async () => {
let status = 0;
try {
child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
);
} catch (e) {
status = e.status;
}
// logger fatal exit code
expect(status).toBe(17);
});
test("test crawl exits if behavior not fetched from git repo", async () => {
let status = 0;
try {
child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
);
} catch (e) {
status = e.status;
}
// logger fatal exit code
expect(status).toBe(17);
});
test("test crawl exits if not custom behaviors collected from local path", async () => {
let status = 0;
try {
child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page",
);
} catch (e) {
status = e.status;
}
// logger fatal exit code
expect(status).toBe(17);
});