2023-07-06 16:09:48 -04:00
import child _process from "child_process" ;
2024-11-04 23:30:53 -05:00
test ( "test custom behaviors from local filepath" , async ( ) => {
2023-11-09 19:11:11 -05:00
const res = child _process . execSync (
2024-11-04 23:30:53 -05:00
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/ --scopeType page" ,
2023-11-09 19:11:11 -05:00
) ;
2023-07-06 16:09:48 -04:00
const log = res . toString ( ) ;
2024-11-04 23:30:53 -05:00
// custom behavior ran for specs.webrecorder.net
2023-11-09 19:11:11 -05:00
expect (
log . indexOf (
2024-11-04 23:30:53 -05:00
'{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}' ,
2023-11-09 19:11:11 -05:00
) > 0 ,
) . toBe ( true ) ;
2023-07-06 16:09:48 -04:00
// but not for example.org
2023-11-09 19:11:11 -05:00
expect (
log . indexOf (
'{"state":{},"msg":"test-stat","page":"https://example.org/","workerid":0}}' ,
) > 0 ,
) . toBe ( false ) ;
expect (
log . indexOf (
'{"state":{"segments":1},"msg":"Skipping autoscroll, page seems to not be responsive to scrolling events","page":"https://example.org/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
2023-07-06 16:09:48 -04:00
2024-10-31 10:24:58 -07:00
// another custom behavior ran for old.webrecorder.net
2023-11-09 19:11:11 -05:00
expect (
log . indexOf (
2024-10-31 10:24:58 -07:00
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}' ,
2023-11-09 19:11:11 -05:00
) > 0 ,
) . toBe ( true ) ;
2023-07-06 16:09:48 -04:00
} ) ;
2023-12-13 12:14:53 -08:00
2024-11-04 23:30:53 -05:00
test ( "test custom behavior from URL" , async ( ) => {
const res = child _process . execSync ( "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --scopeType page" ) ;
const log = res . toString ( ) ;
expect ( log . indexOf ( "Custom behavior file downloaded" ) > 0 ) . toBe ( true ) ;
expect (
log . indexOf (
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
} ) ;
test ( "test mixed custom behavior sources" , async ( ) => {
const res = child _process . execSync ( "docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/custom-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://old.webrecorder.net/ --customBehaviors https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/refs/heads/main/tests/custom-behaviors/custom-2.js --customBehaviors /custom-behaviors/custom.js --scopeType page" ) ;
const log = res . toString ( ) ;
// test custom behavior from url ran
expect ( log . indexOf ( "Custom behavior file downloaded" ) > 0 ) . toBe ( true ) ;
expect (
log . indexOf (
'{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
// test custom behavior from local file ran
expect (
log . indexOf (
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
} ) ;
2024-11-14 01:50:33 -05:00
test ( "test custom behaviors from git repo" , async ( ) => {
const res = child _process . execSync (
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors \"git+https://github.com/webrecorder/browsertrix-crawler.git?branch=main&path=tests/custom-behaviors\" --scopeType page" ,
) ;
const log = res . toString ( ) ;
// custom behavior ran for specs.webrecorder.net
expect (
log . indexOf (
'{"state":{},"msg":"test-stat","page":"https://specs.webrecorder.net/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
// but not for example.org
expect (
log . indexOf (
'{"state":{},"msg":"test-stat","page":"https://example.org/","workerid":0}}' ,
) > 0 ,
) . toBe ( false ) ;
expect (
log . indexOf (
'{"state":{"segments":1},"msg":"Skipping autoscroll, page seems to not be responsive to scrolling events","page":"https://example.org/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
// another custom behavior ran for old.webrecorder.net
expect (
log . indexOf (
'{"state":{},"msg":"test-stat-2","page":"https://old.webrecorder.net/","workerid":0}}' ,
) > 0 ,
) . toBe ( true ) ;
} ) ;
2023-12-13 12:14:53 -08:00
test ( "test invalid behavior exit" , async ( ) => {
let status = 0 ;
try {
child _process . execSync (
2024-10-31 10:24:58 -07:00
"docker run -v $PWD/test-crawls:/crawls -v $PWD/tests/invalid-behaviors/:/custom-behaviors/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --url https://example.org/ --url https://old.webrecorder.net/ --customBehaviors /custom-behaviors/invalid-export.js --scopeType page" ,
2023-12-13 12:14:53 -08:00
) ;
} catch ( e ) {
status = e . status ;
}
// logger fatal exit code
expect ( status ) . toBe ( 17 ) ;
} ) ;
2025-03-31 20:35:30 -04:00
test ( "test crawl exits if behavior not fetched from url" , async ( ) => {
let status = 0 ;
try {
child _process . execSync (
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors https://webrecorder.net/doesntexist/custombehavior.js --scopeType page" ,
) ;
} catch ( e ) {
status = e . status ;
}
// logger fatal exit code
expect ( status ) . toBe ( 17 ) ;
} ) ;
test ( "test crawl exits if behavior not fetched from git repo" , async ( ) => {
let status = 0 ;
try {
child _process . execSync (
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors git+https://github.com/webrecorder/doesntexist --scopeType page" ,
) ;
} catch ( e ) {
status = e . status ;
}
// logger fatal exit code
expect ( status ) . toBe ( 17 ) ;
} ) ;
test ( "test crawl exits if not custom behaviors collected from local path" , async ( ) => {
let status = 0 ;
try {
child _process . execSync (
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://example.com --customBehaviors /custom-behaviors/doesntexist --scopeType page" ,
) ;
} catch ( e ) {
status = e . status ;
}
// logger fatal exit code
expect ( status ) . toBe ( 17 ) ;
} ) ;