tests: remove example.com from tests (#885)

also use local http-server for behavior tests
This commit is contained in:
Ilya Kreymer 2025-09-19 23:21:47 -07:00 committed by GitHub
parent a2742df328
commit 8ca7756d1b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 38 additions and 23 deletions

View file

@ -8,7 +8,7 @@ const testIf = (condition, ...args) => condition ? test(...args) : test.skip(...
test("ensure basic crawl run with docker run passes", async () => { test("ensure basic crawl run with docker run passes", async () => {
child_process.execSync( child_process.execSync(
'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix', 'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --generateWACZ --text --collection wr-net --combineWARC --rolloverSize 10000 --workers 2 --title "test title" --description "test description" --warcPrefix custom-prefix',
); );
child_process.execSync( child_process.execSync(

View file

@ -1,6 +1,21 @@
import child_process from "child_process"; import child_process from "child_process";
import Redis from "ioredis"; import Redis from "ioredis";
let proc = null;
const DOCKER_HOST_NAME = process.env.DOCKER_HOST_NAME || "host.docker.internal";
const TEST_HOST = `http://${DOCKER_HOST_NAME}:31503`;
beforeAll(() => {
proc = child_process.spawn("../../node_modules/.bin/http-server", ["-p", "31503"], {cwd: "tests/custom-behaviors/"});
});
afterAll(() => {
if (proc) {
proc.kill();
}
});
async function sleep(time) { async function sleep(time) {
await new Promise((resolve) => setTimeout(resolve, time)); await new Promise((resolve) => setTimeout(resolve, time));
@ -9,7 +24,7 @@ async function sleep(time) {
test("test custom behaviors from local filepath", async () => { test("test custom behaviors from local filepath", async () => {
const res = child_process.execSync( const res = child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page", "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/page --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page",
); );
const log = res.toString(); const log = res.toString();
@ -21,10 +36,10 @@ test("test custom behaviors from local filepath", async () => {
) > 0, ) > 0,
).toBe(true); ).toBe(true);
// but not for example.org // but not for example.com
expect( expect(
log.indexOf( log.indexOf(
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org","workerid":0}}', '"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/page","workerid":0}}',
) > 0, ) > 0,
).toBe(false); ).toBe(false);
@ -37,7 +52,7 @@ test("test custom behaviors from local filepath", async () => {
}); });
test("test custom behavior from URL", async () => { test("test custom behavior from URL", async () => {
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page"); const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --scopeType page`);
const log = res.toString(); const log = res.toString();
@ -51,7 +66,7 @@ test("test custom behavior from URL", async () => {
}); });
test("test mixed custom behavior sources", async () => { test("test mixed custom behavior sources", async () => {
const res = child_process.execSync("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page"); const res = child_process.execSync(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page`);
const log = res.toString(); const log = res.toString();
@ -74,7 +89,7 @@ test("test mixed custom behavior sources", async () => {
test("test custom behaviors from git repo", async () => { test("test custom behaviors from git repo", async () => {
const res = child_process.execSync( const res = child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page",
); );
const log = res.toString(); const log = res.toString();
@ -86,10 +101,10 @@ test("test custom behaviors from git repo", async () => {
) > 0, ) > 0,
).toBe(true); ).toBe(true);
// but not for example.org // but not for example.com
expect( expect(
log.indexOf( log.indexOf(
'"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example.org/","workerid":0}}', '"logLevel":"info","context":"behaviorScriptCustom","message":"test-stat","details":{"state":{},"behavior":"TestBehavior","page":"https://example-com.webrecorder.net/","workerid":0}}',
) > 0, ) > 0,
).toBe(false); ).toBe(false);
@ -106,7 +121,7 @@ test("test invalid behavior exit", async () => {
try { try {
child_process.execSync( child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page", "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net.webrecorder.net/ --url https://example-com.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page",
); );
} catch (e) { } catch (e) {
status = e.status; status = e.status;
@ -121,7 +136,7 @@ test("test crawl exits if behavior not fetched from url", async () => {
try { try {
child_process.execSync( child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page",
); );
} catch (e) { } catch (e) {
status = e.status; status = e.status;
@ -136,7 +151,7 @@ test("test crawl exits if behavior not fetched from git repo", async () => {
try { try {
child_process.execSync( child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page",
); );
} catch (e) { } catch (e) {
status = e.status; status = e.status;
@ -151,7 +166,7 @@ test("test crawl exits if not custom behaviors collected from local path", async
try { try {
child_process.execSync( child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net --customBehaviors /custom-behaviors/doesntexist --scopeType page",
); );
} catch (e) { } catch (e) {
status = e.status; status = e.status;
@ -166,7 +181,7 @@ test("test pushing behavior logs to redis", async () => {
const redisId = child_process.execSync("docker run --rm --network=crawl -p 36399:6379 --name redis -d redis"); const redisId = child_process.execSync("docker run --rm --network=crawl -p 36399:6379 --name redis -d redis");
const child = child_process.exec("docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis"); const child = child_process.exec(`docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ -e CRAWL_ID=behavior-logs-redis-test --network=crawl --rm webrecorder/browsertrix-crawler crawl --debugAccessRedis --redisStoreUrl redis://redis:6379 --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors ${TEST_HOST}/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page --logBehaviorsToRedis`);
let resolve = null; let resolve = null;
const crawlFinished = new Promise(r => resolve = r); const crawlFinished = new Promise(r => resolve = r);

View file

@ -28,7 +28,7 @@
}, },
{ {
"type": "change", "type": "change",
"value": "https://example.com/", "value": "https://example-com.webrecorder.net/",
"selectors": [ "selectors": [
[ [
"aria/[role=\"main\"]", "aria/[role=\"main\"]",

View file

@ -71,7 +71,7 @@ test("test valid autoclick selector passes validation", async () => {
try { try {
child_process.execSync( child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector button --scopeType page", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector button --scopeType page",
); );
} catch (e) { } catch (e) {
failed = true; failed = true;
@ -87,7 +87,7 @@ test("test invalid autoclick selector fails validation, crawl fails", async () =
try { try {
child_process.execSync( child_process.execSync(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --clickSelector \",\" --scopeType page", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --clickSelector \",\" --scopeType page",
); );
} catch (e) { } catch (e) {
status = e.status; status = e.status;

View file

@ -6,7 +6,7 @@ import { execSync } from "child_process";
test("ensure exclusion is applied on redirected URL, which contains 'help', so it is not crawled", () => { test("ensure exclusion is applied on redirected URL, which contains 'help', so it is not crawled", () => {
execSync( execSync(
"docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com/ --exclude help --collection redir-exclude-test --extraHops 1"); "docker run -p 9037:9037 -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example-com.webrecorder.net/ --exclude help --collection redir-exclude-test --extraHops 1");
// no entries besides header // no entries besides header
expect( expect(

View file

@ -10,7 +10,7 @@ export class TestBehavior {
} }
static isMatch() { static isMatch() {
return window.location.origin === "https://example.com"; return window.location.origin === "https://example-com.webrecorder.net";
} }
async *run(ctx) { async *run(ctx) {

View file

@ -38,7 +38,7 @@ afterAll(() => {
test("run crawl with retries for no response", async () => { test("run crawl with retries for no response", async () => {
execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example.com/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`); execSync(`docker run -d -v $PWD/test-crawls:/crawls -e CRAWL_ID=test -p 36387:6379 --rm webrecorder/browsertrix-crawler crawl --url http://invalid-host-x:31501 --url https://example-com.webrecorder.net/ --limit 2 --pageExtraDelay 10 --debugAccessRedis --collection retry-fail --retries 5`);
const redis = new Redis("redis://127.0.0.1:36387/0", { lazyConnect: true, retryStrategy: () => null }); const redis = new Redis("redis://127.0.0.1:36387/0", { lazyConnect: true, retryStrategy: () => null });
@ -90,7 +90,7 @@ test("run crawl with retries for 503, enough retries to succeed", async () => {
requests = 0; requests = 0;
success = false; success = false;
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-2 --retries 2 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
let status = 0; let status = 0;
@ -117,7 +117,7 @@ test("run crawl with retries for 503, not enough retries, fail", async () => {
requests = 0; requests = 0;
success = false; success = false;
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-3 --retries 1 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
let status = 0; let status = 0;
@ -143,7 +143,7 @@ test("run crawl with retries for 503, no retries, fail", async () => {
requests = 0; requests = 0;
success = false; success = false;
const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example.com/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`); const child = exec(`docker run -v $PWD/test-crawls:/crawls --rm webrecorder/browsertrix-crawler crawl --url http://${DOCKER_HOST_NAME}:31501 --url https://example-com.webrecorder.net/ --limit 2 --collection retry-fail-4 --retries 0 --failOnInvalidStatus --failOnFailedSeed --logging stats,debug`);
let status = 0; let status = 0;