diff --git a/tests/basic_crawl.test.js b/tests/basic_crawl.test.js index 6fc00af6..940967ab 100644 --- a/tests/basic_crawl.test.js +++ b/tests/basic_crawl.test.js @@ -8,7 +8,7 @@ const testIf = (condition, ...args) => condition ? test(...args) : test.skip(... test("ensure basic crawl run with docker run passes", async () => { child_process.execSync( - 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix', + 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix', ); child_process.execSync( diff --git a/tests/custom-behavior.test.js b/tests/custom-behavior.test.js index b49ea1d8..2a7f5fc7 100644 --- a/tests/custom-behavior.test.js +++ b/tests/custom-behavior.test.js @@ -1,6 +1,21 @@ import child_process from "child_process"; import Redis from "ioredis"; +let proc = null; + +const DOCKER_HOST_NAME = process.env.DOCKER_HOST_NAME || "host.docker.internal"; +const TEST_HOST = `http://${DOCKER_HOST_NAME}:31503`; + +beforeAll(() => { + proc = child_process.spawn("../../node_modules/.bin/http-server", ["-p", "31503"], {cwd: "tests/custom-behaviors/"}); +}); + +afterAll(() => { + if (proc) { + proc.kill(); + } +}); + async function sleep(time) { await new Promise((resolve) => setTimeout(resolve, time)); @@ -9,7 +24,7 @@ async function sleep(time) { test("test custom behaviors from local filepath", async () => { const res = child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page", + "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/page --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page", ); const log = res.toString(); @@ -21,10 +36,10 @@ test("test custom behaviors from local filepath", async () => { ) > 0, ).toBe(true); - // but not for example.org + // but not for example.com expect( log.indexOf( - '"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org","workerid":0}}', + '"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/page","workerid":0}}', ) > 0, ).toBe(false); @@ -37,7 +52,7 @@ test("test custom behaviors from local filepath", async () => { }); test("test custom behavior from URL", async () => { - const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page"); + const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --scopeType page`); const log = res.toString(); @@ -51,7 +66,7 @@ test("test custom behavior from URL", async () => { }); test("test mixed custom behavior sources", async () => { - const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page"); + const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page`); const log = res.toString(); @@ -74,7 +89,7 @@ test("test mixed custom behavior sources", async () => { test("test custom behaviors from git repo", async () => { const res = child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page", ); const log = res.toString(); @@ -86,10 +101,10 @@ test("test custom behaviors from git repo", async () => { ) > 0, ).toBe(true); - // but not for example.org + // but not for example.com expect( log.indexOf( - '"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org/","workerid":0}}', + '"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/","workerid":0}}', ) > 0, ).toBe(false); @@ -106,7 +121,7 @@ test("test invalid behavior exit", async () => { try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page", + "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page", ); } catch (e) { status = e.status; @@ -121,7 +136,7 @@ test("test crawl exits if behavior not fetched from url", async () => { try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page", ); } catch (e) { status = e.status; @@ -136,7 +151,7 @@ test("test crawl exits if behavior not fetched from git repo", async () => { try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page", ); } catch (e) { status = e.status; @@ -151,7 +166,7 @@ test("test crawl exits if not custom behaviors collected from local path", async try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors /custom-behaviors/doesntexist --scopeType page", ); } catch (e) { status = e.status; @@ -166,7 +181,7 @@ test("test pushing behavior logs to redis", async () => { const redisId = child_process.execSync("docker run --rm --network=crawl -p 36399:6379 --name redis -d redis"); - const child = child_process.exec("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis"); + const child = child_process.exec(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis`); let resolve = null; const crawlFinished = new Promise(r => resolve = r); diff --git a/tests/custom-behaviors/custom-flow.json b/tests/custom-behaviors/custom-flow.json index 9a55b6f5..2b0f2e48 100644 --- a/tests/custom-behaviors/custom-flow.json +++ b/tests/custom-behaviors/custom-flow.json @@ -28,7 +28,7 @@ }, { "type": "change", - "value": "https://example.com/", + "value": "https://example-com.webrecorder.net/", "selectors": [ [ "aria/[role=\"main\"]", diff --git a/tests/custom_selector.test.js b/tests/custom_selector.test.js index c2516d6e..4b180bb0 100644 --- a/tests/custom_selector.test.js +++ b/tests/custom_selector.test.js @@ -71,7 +71,7 @@ test("test valid autoclick selector passes validation", async () => { try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector button --scopeType page", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector button --scopeType page", ); } catch (e) { failed = true; @@ -87,7 +87,7 @@ test("test invalid autoclick selector fails validation, crawl fails", async () = try { child_process.execSync( - "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector \",\" --scopeType page", + "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector \",\" --scopeType page", ); } catch (e) { status = e.status; diff --git a/tests/exclude-redirected.test.js b/tests/exclude-redirected.test.js index aaa9decf..b81a0ef8 100644 --- a/tests/exclude-redirected.test.js +++ b/tests/exclude-redirected.test.js @@ -6,7 +6,7 @@ import { execSync } from "child_process"; test("ensure exclusion is applied on redirected URL, which contains 'help', so it is not crawled", () => { execSync( - "docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --exclude help --collection redir-exclude-test --extraHops 1"); + "docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --exclude help --collection redir-exclude-test --extraHops 1"); // no entries besides header expect( diff --git a/tests/invalid-behaviors/invalid-export.js b/tests/invalid-behaviors/invalid-export.js index 061f6ff4..d458b910 100644 --- a/tests/invalid-behaviors/invalid-export.js +++ b/tests/invalid-behaviors/invalid-export.js @@ -10,7 +10,7 @@ export class TestBehavior { } static isMatch() { - return window.location.origin === "https://example.com"; + return window.location.origin === "https://example-com.webrecorder.net"; } async *run(ctx) { diff --git a/tests/retry-failed.test.js b/tests/retry-failed.test.js index a1c21bdf..b914ad34 100644 --- a/tests/retry-failed.test.js +++ b/tests/retry-failed.test.js @@ -38,7 +38,7 @@ afterAll(() => { test("run crawl with retries for no response", async () => { - execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example.com/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`); + execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example-com.webrecorder.net/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`); const redis = new Redis("redis://127.0.0.1:36387/0", { lazyConnect: true, retryStrategy: () => null }); @@ -90,7 +90,7 @@ test("run crawl with retries for 503, enough retries to succeed", async () => { requests = 0; success = false; - const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); + const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); let status = 0; @@ -117,7 +117,7 @@ test("run crawl with retries for 503, not enough retries, fail", async () => { requests = 0; success = false; - const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); + const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); let status = 0; @@ -143,7 +143,7 @@ test("run crawl with retries for 503, no retries, fail", async () => { requests = 0; success = false; - const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); + const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); let status = 0;