browsertrix-crawler/tests/custom-behavior.test.js
Tessa Walsh 2a9b152531
Support loading custom behaviors from URLs and/or filepaths (#707)
Fixes #368 

The `--customBehaviors` flag is now an array, making it repeatable. This
should be backwards compatible with the CLI flag, but may require
changes to YAML configs when custom behaviors are used.

Custom behaviors can be loaded from URLs, local filepaths, and paths to
local directories, including any combination thereof.

New tests are added to ensure loading behaviors from URLs as well as a
mixed combination of URL and filepath works as expected.

---------

Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
2024-11-04 20:30:53 -08:00

87 lines
3.3 KiB
JavaScript

import child_process from "child_process";
test("test custom behaviors from local filepath", async () => {
const res = child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
);
const log = res.toString();
// custom behavior ran for specs.webrecorder.net
expect(
log.indexOf(
'{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}',
) > 0,
).toBe(true);
// but not for example.org
expect(
log.indexOf(
'{"state":{},"msg":"test-stat","page":"https://example.org/","workerid":0}}',
) > 0,
).toBe(false);
expect(
log.indexOf(
'{"state":{"segments":1},"msg":"Skipping autoscroll, page seems to not be responsive to scrolling events","page":"https://example.org/","workerid":0}}',
) > 0,
).toBe(true);
// another custom behavior ran for old.webrecorder.net
expect(
log.indexOf(
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}',
) > 0,
).toBe(true);
});
test("test custom behavior from URL", async () => {
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page");
const log = res.toString();
expect(log.indexOf("Custom behavior file downloaded") > 0).toBe(true);
expect(
log.indexOf(
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}',
) > 0,
).toBe(true);
});
test("test mixed custom behavior sources", async () => {
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page");
const log = res.toString();
// test custom behavior from url ran
expect(log.indexOf("Custom behavior file downloaded") > 0).toBe(true);
expect(
log.indexOf(
'{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}',
) > 0,
).toBe(true);
// test custom behavior from local file ran
expect(
log.indexOf(
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}',
) > 0,
).toBe(true);
});
test("test invalid behavior exit", async () => {
let status = 0;
try {
child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
);
} catch (e) {
status = e.status;
}
// logger fatal exit code
expect(status).toBe(17);
});