mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
tests: remove example.com from tests (#885)
also use local http-server for behavior tests
This commit is contained in:
parent
a2742df328
commit
8ca7756d1b
7 changed files with 38 additions and 23 deletions
|
@ -8,7 +8,7 @@ const testIf = (condition, ...args) => condition ? test(...args) : test.skip(...
|
|||
|
||||
test("ensure basic crawl run with docker run passes", async () => {
|
||||
child_process.execSync(
|
||||
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix',
|
||||
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix',
|
||||
);
|
||||
|
||||
child_process.execSync(
|
||||
|
|
|
@ -1,6 +1,21 @@
|
|||
import child_process from "child_process";
|
||||
import Redis from "ioredis";
|
||||
|
||||
let proc = null;
|
||||
|
||||
const DOCKER_HOST_NAME = process.env.DOCKER_HOST_NAME || "host.docker.internal";
|
||||
const TEST_HOST = `http://${DOCKER_HOST_NAME}:31503`;
|
||||
|
||||
beforeAll(() => {
|
||||
proc = child_process.spawn("../../node_modules/.bin/http-server", ["-p", "31503"], {cwd: "tests/custom-behaviors/"});
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
if (proc) {
|
||||
proc.kill();
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
async function sleep(time) {
|
||||
await new Promise((resolve) => setTimeout(resolve, time));
|
||||
|
@ -9,7 +24,7 @@ async function sleep(time) {
|
|||
|
||||
test("test custom behaviors from local filepath", async () => {
|
||||
const res = child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/page --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
|
||||
);
|
||||
|
||||
const log = res.toString();
|
||||
|
@ -21,10 +36,10 @@ test("test custom behaviors from local filepath", async () => {
|
|||
) > 0,
|
||||
).toBe(true);
|
||||
|
||||
// but not for example.org
|
||||
// but not for example.com
|
||||
expect(
|
||||
log.indexOf(
|
||||
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org","workerid":0}}',
|
||||
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/page","workerid":0}}',
|
||||
) > 0,
|
||||
).toBe(false);
|
||||
|
||||
|
@ -37,7 +52,7 @@ test("test custom behaviors from local filepath", async () => {
|
|||
});
|
||||
|
||||
test("test custom behavior from URL", async () => {
|
||||
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page");
|
||||
const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --scopeType page`);
|
||||
|
||||
const log = res.toString();
|
||||
|
||||
|
@ -51,7 +66,7 @@ test("test custom behavior from URL", async () => {
|
|||
});
|
||||
|
||||
test("test mixed custom behavior sources", async () => {
|
||||
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page");
|
||||
const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page`);
|
||||
|
||||
const log = res.toString();
|
||||
|
||||
|
@ -74,7 +89,7 @@ test("test mixed custom behavior sources", async () => {
|
|||
|
||||
test("test custom behaviors from git repo", async () => {
|
||||
const res = child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page",
|
||||
);
|
||||
|
||||
const log = res.toString();
|
||||
|
@ -86,10 +101,10 @@ test("test custom behaviors from git repo", async () => {
|
|||
) > 0,
|
||||
).toBe(true);
|
||||
|
||||
// but not for example.org
|
||||
// but not for example.com
|
||||
expect(
|
||||
log.indexOf(
|
||||
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org/","workerid":0}}',
|
||||
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/","workerid":0}}',
|
||||
) > 0,
|
||||
).toBe(false);
|
||||
|
||||
|
@ -106,7 +121,7 @@ test("test invalid behavior exit", async () => {
|
|||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
|
@ -121,7 +136,7 @@ test("test crawl exits if behavior not fetched from url", async () => {
|
|||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
|
@ -136,7 +151,7 @@ test("test crawl exits if behavior not fetched from git repo", async () => {
|
|||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
|
@ -151,7 +166,7 @@ test("test crawl exits if not custom behaviors collected from local path", async
|
|||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors /custom-behaviors/doesntexist --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
|
@ -166,7 +181,7 @@ test("test pushing behavior logs to redis", async () => {
|
|||
|
||||
const redisId = child_process.execSync("docker run --rm --network=crawl -p 36399:6379 --name redis -d redis");
|
||||
|
||||
const child = child_process.exec("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis");
|
||||
const child = child_process.exec(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis`);
|
||||
|
||||
let resolve = null;
|
||||
const crawlFinished = new Promise(r => resolve = r);
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
},
|
||||
{
|
||||
"type": "change",
|
||||
"value": "https://example.com/",
|
||||
"value": "https://example-com.webrecorder.net/",
|
||||
"selectors": [
|
||||
[
|
||||
"aria/[role=\"main\"]",
|
||||
|
|
|
@ -71,7 +71,7 @@ test("test valid autoclick selector passes validation", async () => {
|
|||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector button --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector button --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
failed = true;
|
||||
|
@ -87,7 +87,7 @@ test("test invalid autoclick selector fails validation, crawl fails", async () =
|
|||
|
||||
try {
|
||||
child_process.execSync(
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector \",\" --scopeType page",
|
||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector \",\" --scopeType page",
|
||||
);
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
|
|
|
@ -6,7 +6,7 @@ import { execSync } from "child_process";
|
|||
|
||||
test("ensure exclusion is applied on redirected URL, which contains 'help', so it is not crawled", () => {
|
||||
execSync(
|
||||
"docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --exclude help --collection redir-exclude-test --extraHops 1");
|
||||
"docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --exclude help --collection redir-exclude-test --extraHops 1");
|
||||
|
||||
// no entries besides header
|
||||
expect(
|
||||
|
|
|
@ -10,7 +10,7 @@ export class TestBehavior {
|
|||
}
|
||||
|
||||
static isMatch() {
|
||||
return window.location.origin === "https://example.com";
|
||||
return window.location.origin === "https://example-com.webrecorder.net";
|
||||
}
|
||||
|
||||
async *run(ctx) {
|
||||
|
|
|
@ -38,7 +38,7 @@ afterAll(() => {
|
|||
|
||||
|
||||
test("run crawl with retries for no response", async () => {
|
||||
execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example.com/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`);
|
||||
execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example-com.webrecorder.net/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`);
|
||||
|
||||
const redis = new Redis("redis://127.0.0.1:36387/0", { lazyConnect: true, retryStrategy: () => null });
|
||||
|
||||
|
@ -90,7 +90,7 @@ test("run crawl with retries for 503, enough retries to succeed", async () => {
|
|||
requests = 0;
|
||||
success = false;
|
||||
|
||||
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
|
||||
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
|
||||
|
||||
let status = 0;
|
||||
|
||||
|
@ -117,7 +117,7 @@ test("run crawl with retries for 503, not enough retries, fail", async () => {
|
|||
requests = 0;
|
||||
success = false;
|
||||
|
||||
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
|
||||
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
|
||||
|
||||
let status = 0;
|
||||
|
||||
|
@ -143,7 +143,7 @@ test("run crawl with retries for 503, no retries, fail", async () => {
|
|||
requests = 0;
|
||||
success = false;
|
||||
|
||||
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
|
||||
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
|
||||
|
||||
let status = 0;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue