mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00

Fixes #804 - Site-specific behaviors use behaviorScriptCustom log context (via browsertrix-behaviors 0.8.3) - Add behavior logs to redis if --logBehaviorsToRedis is set, including non-debug behaviors / behaviorsScript context and all behaviorScriptCustom logs - Noisy logs from built-in behaviors like autoscroll are now logged to debug in https://github.com/webrecorder/browsertrix-behaviors/pull/92 and so won't be pushed to Redis for newer versions of the crawler. - Updates browsertrix-behaviors to 0.8.3 and makes some changes to log format in tests accordingly. --------- Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
213 lines
8.3 KiB
JavaScript
213 lines
8.3 KiB
JavaScript
import child_process from "child_process";
|
|
import Redis from "ioredis";
|
|
|
|
|
|
async function sleep(time) {
|
|
await new Promise((resolve) => setTimeout(resolve, time));
|
|
}
|
|
|
|
|
|
test("test custom behaviors from local filepath", async () => {
|
|
const res = child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
|
|
);
|
|
|
|
const log = res.toString();
|
|
|
|
// custom behavior ran for specs.webrecorder.net
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://specs.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
|
|
// but not for example.org
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org","workerid":0}}',
|
|
) > 0,
|
|
).toBe(false);
|
|
|
|
// another custom behavior ran for old.webrecorder.net
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat-2","details":{"state":{},"behavior":"TestBehavior2","page":"https://old.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
});
|
|
|
|
test("test custom behavior from URL", async () => {
|
|
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page");
|
|
|
|
const log = res.toString();
|
|
|
|
expect(log.indexOf("Custom behavior file downloaded") > 0).toBe(true);
|
|
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat-2","details":{"state":{},"behavior":"TestBehavior2","page":"https://old.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
});
|
|
|
|
test("test mixed custom behavior sources", async () => {
|
|
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page");
|
|
|
|
const log = res.toString();
|
|
|
|
// test custom behavior from url ran
|
|
expect(log.indexOf("Custom behavior file downloaded") > 0).toBe(true);
|
|
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://specs.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
|
|
// test custom behavior from local file ran
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat-2","details":{"state":{},"behavior":"TestBehavior2","page":"https://old.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
});
|
|
|
|
test("test custom behaviors from git repo", async () => {
|
|
const res = child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page",
|
|
);
|
|
|
|
const log = res.toString();
|
|
|
|
// custom behavior ran for specs.webrecorder.net
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://specs.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
|
|
// but not for example.org
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(false);
|
|
|
|
// another custom behavior ran for old.webrecorder.net
|
|
expect(
|
|
log.indexOf(
|
|
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat-2","details":{"state":{},"behavior":"TestBehavior2","page":"https://old.webrecorder.net/","workerid":0}}',
|
|
) > 0,
|
|
).toBe(true);
|
|
});
|
|
|
|
test("test invalid behavior exit", async () => {
|
|
let status = 0;
|
|
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
|
|
);
|
|
} catch (e) {
|
|
status = e.status;
|
|
}
|
|
|
|
// logger fatal exit code
|
|
expect(status).toBe(17);
|
|
});
|
|
|
|
test("test crawl exits if behavior not fetched from url", async () => {
|
|
let status = 0;
|
|
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
|
|
);
|
|
} catch (e) {
|
|
status = e.status;
|
|
}
|
|
|
|
// logger fatal exit code
|
|
expect(status).toBe(17);
|
|
});
|
|
|
|
test("test crawl exits if behavior not fetched from git repo", async () => {
|
|
let status = 0;
|
|
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
|
|
);
|
|
} catch (e) {
|
|
status = e.status;
|
|
}
|
|
|
|
// logger fatal exit code
|
|
expect(status).toBe(17);
|
|
});
|
|
|
|
test("test crawl exits if not custom behaviors collected from local path", async () => {
|
|
let status = 0;
|
|
|
|
try {
|
|
child_process.execSync(
|
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page",
|
|
);
|
|
} catch (e) {
|
|
status = e.status;
|
|
}
|
|
|
|
// logger fatal exit code
|
|
expect(status).toBe(17);
|
|
});
|
|
|
|
test("test pushing behavior logs to redis", async () => {
|
|
child_process.execSync("docker network create crawl");
|
|
|
|
const redisId = child_process.execSync("docker run --rm --network=crawl -p 36399:6379 --name redis -d redis");
|
|
|
|
const child = child_process.exec("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis");
|
|
|
|
let resolve = null;
|
|
const crawlFinished = new Promise(r => resolve = r);
|
|
|
|
child.on("exit", function () {
|
|
resolve();
|
|
});
|
|
|
|
await crawlFinished;
|
|
|
|
const redis = new Redis("redis://127.0.0.1:36399/0", { lazyConnect: true, retryStrategy: () => null });
|
|
|
|
await sleep(3000);
|
|
|
|
await redis.connect({ maxRetriesPerRequest: 50 });
|
|
|
|
let customLogLineCount = 0;
|
|
|
|
while (true) {
|
|
const res = await redis.lpop("behavior-logs-redis-test:b");
|
|
if (!res) {
|
|
break;
|
|
}
|
|
const json = JSON.parse(res);
|
|
expect(json).toHaveProperty("timestamp");
|
|
expect(json.logLevel).toBe("info");
|
|
expect(["behavior", "behaviorScript", "behaviorScriptCustom"]).toContain(json.context)
|
|
|
|
if (json.context === "behaviorScriptCustom") {
|
|
expect(["test-stat", "test-stat-2", "done!"]).toContain(json.message);
|
|
expect(["TestBehavior", "TestBehavior2"]).toContain(json.details.behavior);
|
|
expect(["https://specs.webrecorder.net/", "https://old.webrecorder.net/"]).toContain(json.details.page);
|
|
customLogLineCount++;
|
|
}
|
|
}
|
|
|
|
expect(customLogLineCount).toEqual(4);
|
|
|
|
child_process.execSync(`docker kill ${redisId}`);
|
|
|
|
await sleep(3000);
|
|
|
|
child_process.execSync("docker network rm crawl");
|
|
});
|