mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
Fail crawl with fatal message if custom behavior isn't loaded (#799)
Fixes #797 The crawler will now exit with a fatal log message and exit code 17 if: - A Git repository specified with `--customBehavior` cannot be cloned successfully (new) - A custom behavior file at a URL specified with `--customBehavior` is not fetched successfully (new) - No custom behaviors are collected at a local filepath specified with `--customBehavior`, or if an error is thrown while attempting to collect files from a nonexistent path (new) - Any custom behaviors collected fail `Browser.checkScript` validation (existing behavior) Tests have also been added accordingly.
This commit is contained in:
parent
e751929a7a
commit
5fedde6eee
2 changed files with 90 additions and 26 deletions
|
@ -72,7 +72,7 @@ async function collectGitBehaviors(gitUrl: string): Promise<FileSources> {
|
|||
);
|
||||
return await collectLocalPathBehaviors(pathToCollect);
|
||||
} catch (e) {
|
||||
logger.error(
|
||||
logger.fatal(
|
||||
"Error downloading custom behaviors from Git repo",
|
||||
{ url: urlStripped, error: e },
|
||||
"behavior",
|
||||
|
@ -96,7 +96,7 @@ async function collectOnlineBehavior(url: string): Promise<FileSources> {
|
|||
);
|
||||
return await collectLocalPathBehaviors(behaviorFilepath);
|
||||
} catch (e) {
|
||||
logger.error(
|
||||
logger.fatal(
|
||||
"Error downloading custom behavior from URL",
|
||||
{ url, error: e },
|
||||
"behavior",
|
||||
|
@ -120,37 +120,56 @@ async function collectLocalPathBehaviors(
|
|||
return [];
|
||||
}
|
||||
|
||||
const stat = await fsp.stat(resolvedPath);
|
||||
|
||||
if (stat.isFile() && ALLOWED_EXTS.includes(path.extname(resolvedPath))) {
|
||||
const contents = await fsp.readFile(resolvedPath);
|
||||
return [
|
||||
{
|
||||
path: resolvedPath,
|
||||
contents: `/* src: ${resolvedPath} */\n\n${contents}`,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const behaviors: FileSources = [];
|
||||
|
||||
const isDir = stat.isDirectory();
|
||||
try {
|
||||
const stat = await fsp.stat(resolvedPath);
|
||||
|
||||
if (!isDir && depth === 0) {
|
||||
logger.warn(
|
||||
"The provided path is not a .js file or directory",
|
||||
{ path: resolvedPath },
|
||||
if (stat.isFile() && ALLOWED_EXTS.includes(path.extname(resolvedPath))) {
|
||||
const contents = await fsp.readFile(resolvedPath);
|
||||
return [
|
||||
{
|
||||
path: resolvedPath,
|
||||
contents: `/* src: ${resolvedPath} */\n\n${contents}`,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const isDir = stat.isDirectory();
|
||||
|
||||
if (!isDir && depth === 0) {
|
||||
logger.warn(
|
||||
"The provided path is not a .js file or directory",
|
||||
{ path: resolvedPath },
|
||||
"behavior",
|
||||
);
|
||||
}
|
||||
|
||||
if (isDir) {
|
||||
const files = await fsp.readdir(resolvedPath);
|
||||
for (const file of files) {
|
||||
const filePath = path.join(resolvedPath, file);
|
||||
const newBehaviors = await collectLocalPathBehaviors(
|
||||
filePath,
|
||||
depth + 1,
|
||||
);
|
||||
behaviors.push(...newBehaviors);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
logger.fatal(
|
||||
"Error fetching local custom behaviors",
|
||||
{ path: resolvedPath, error: e },
|
||||
"behavior",
|
||||
);
|
||||
}
|
||||
|
||||
if (isDir) {
|
||||
const files = await fsp.readdir(resolvedPath);
|
||||
for (const file of files) {
|
||||
const filePath = path.join(resolvedPath, file);
|
||||
const newBehaviors = await collectLocalPathBehaviors(filePath, depth + 1);
|
||||
behaviors.push(...newBehaviors);
|
||||
}
|
||||
if (!behaviors && depth === 0) {
|
||||
logger.fatal(
|
||||
"No custom behaviors found at specified path",
|
||||
{ path: resolvedPath },
|
||||
"behavior",
|
||||
);
|
||||
}
|
||||
|
||||
return behaviors;
|
||||
|
|
|
@ -120,3 +120,48 @@ test("test invalid behavior exit", async () => {
|
|||
// logger fatal exit code
|
||||
expect(status).toBe(17);
|
||||
});
|
||||
|
||||
test("test crawl exits if behavior not fetched from url", async () => {
|
||||
let status = 0;
|
||||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
}
|
||||
|
||||
// logger fatal exit code
|
||||
expect(status).toBe(17);
|
||||
});
|
||||
|
||||
test("test crawl exits if behavior not fetched from git repo", async () => {
|
||||
let status = 0;
|
||||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
}
|
||||
|
||||
// logger fatal exit code
|
||||
expect(status).toBe(17);
|
||||
});
|
||||
|
||||
test("test crawl exits if not custom behaviors collected from local path", async () => {
|
||||
let status = 0;
|
||||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
}
|
||||
|
||||
// logger fatal exit code
|
||||
expect(status).toBe(17);
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue