mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Add more exit codes to detect interruption reason (#764)
Fix #584 - Replace interrupted with interruptReason - Distinct exit codes for different interrupt reasons: SizeLimit (14), TimeLimit (15), FailedLimit (12), DiskUtilization (16) are used when an interrupt happens for these reasons, in addition to existing reasons BrowserCrashed (10), SignalInterrupted (11) and SignalInterruptedForce (13) - Doc fix to cli args --------- Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com> Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
parent
846f0355f6
commit
fc56c2cf76
9 changed files with 139 additions and 52 deletions
|
@ -243,7 +243,7 @@ Options:
|
||||||
--maxPageRetries, --retries If set, number of times to retry a p
|
--maxPageRetries, --retries If set, number of times to retry a p
|
||||||
age that failed to load before page
|
age that failed to load before page
|
||||||
is considered to have failed
|
is considered to have failed
|
||||||
[number] [default: 1]
|
[number] [default: 2]
|
||||||
--failOnFailedSeed If set, crawler will fail with exit
|
--failOnFailedSeed If set, crawler will fail with exit
|
||||||
code 1 if any seed fails. When combi
|
code 1 if any seed fails. When combi
|
||||||
ned with --failOnInvalidStatus,will
|
ned with --failOnInvalidStatus,will
|
||||||
|
|
|
@ -48,6 +48,7 @@ import {
|
||||||
PAGE_OP_TIMEOUT_SECS,
|
PAGE_OP_TIMEOUT_SECS,
|
||||||
SITEMAP_INITIAL_FETCH_TIMEOUT_SECS,
|
SITEMAP_INITIAL_FETCH_TIMEOUT_SECS,
|
||||||
ExitCodes,
|
ExitCodes,
|
||||||
|
InterruptReason,
|
||||||
} from "./util/constants.js";
|
} from "./util/constants.js";
|
||||||
|
|
||||||
import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
|
import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
|
||||||
|
@ -168,8 +169,7 @@ export class Crawler {
|
||||||
|
|
||||||
skipTextDocs = 0;
|
skipTextDocs = 0;
|
||||||
|
|
||||||
interrupted = false;
|
interruptReason: InterruptReason | null = null;
|
||||||
browserCrashed = false;
|
|
||||||
finalExit = false;
|
finalExit = false;
|
||||||
uploadAndDeleteLocal = false;
|
uploadAndDeleteLocal = false;
|
||||||
done = false;
|
done = false;
|
||||||
|
@ -307,7 +307,7 @@ export class Crawler {
|
||||||
|
|
||||||
this.healthChecker = null;
|
this.healthChecker = null;
|
||||||
|
|
||||||
this.interrupted = false;
|
this.interruptReason = null;
|
||||||
this.finalExit = false;
|
this.finalExit = false;
|
||||||
this.uploadAndDeleteLocal = false;
|
this.uploadAndDeleteLocal = false;
|
||||||
|
|
||||||
|
@ -596,11 +596,28 @@ export class Crawler {
|
||||||
} else if (stopped) {
|
} else if (stopped) {
|
||||||
status = "done";
|
status = "done";
|
||||||
logger.info("Crawl gracefully stopped on request");
|
logger.info("Crawl gracefully stopped on request");
|
||||||
} else if (this.interrupted) {
|
} else if (this.interruptReason) {
|
||||||
status = "interrupted";
|
status = "interrupted";
|
||||||
exitCode = this.browserCrashed
|
switch (this.interruptReason) {
|
||||||
? ExitCodes.BrowserCrashed
|
case InterruptReason.SizeLimit:
|
||||||
: ExitCodes.InterruptedGraceful;
|
exitCode = ExitCodes.SizeLimit;
|
||||||
|
break;
|
||||||
|
case InterruptReason.BrowserCrashed:
|
||||||
|
exitCode = ExitCodes.BrowserCrashed;
|
||||||
|
break;
|
||||||
|
case InterruptReason.SignalInterrupted:
|
||||||
|
exitCode = ExitCodes.SignalInterrupted;
|
||||||
|
break;
|
||||||
|
case InterruptReason.DiskUtilization:
|
||||||
|
exitCode = ExitCodes.DiskUtilization;
|
||||||
|
break;
|
||||||
|
case InterruptReason.FailedLimit:
|
||||||
|
exitCode = ExitCodes.FailedLimit;
|
||||||
|
break;
|
||||||
|
case InterruptReason.TimeLimit:
|
||||||
|
exitCode = ExitCodes.TimeLimit;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
@ -1378,7 +1395,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
}
|
}
|
||||||
|
|
||||||
async checkLimits() {
|
async checkLimits() {
|
||||||
let interrupt = false;
|
let interrupt: InterruptReason | null = null;
|
||||||
|
|
||||||
const size = await this.updateCurrSize();
|
const size = await this.updateCurrSize();
|
||||||
|
|
||||||
|
@ -1387,7 +1404,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
logger.info(
|
logger.info(
|
||||||
`Size threshold reached ${size} >= ${this.params.sizeLimit}, stopping`,
|
`Size threshold reached ${size} >= ${this.params.sizeLimit}, stopping`,
|
||||||
);
|
);
|
||||||
interrupt = true;
|
interrupt = InterruptReason.SizeLimit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1397,7 +1414,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
logger.info(
|
logger.info(
|
||||||
`Time threshold reached ${elapsed} > ${this.params.timeLimit}, stopping`,
|
`Time threshold reached ${elapsed} > ${this.params.timeLimit}, stopping`,
|
||||||
);
|
);
|
||||||
interrupt = true;
|
interrupt = InterruptReason.TimeLimit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1409,7 +1426,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
size,
|
size,
|
||||||
);
|
);
|
||||||
if (diskUtil.stop === true) {
|
if (diskUtil.stop === true) {
|
||||||
interrupt = true;
|
interrupt = InterruptReason.DiskUtilization;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1419,18 +1436,21 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
if (numFailed >= failedLimit) {
|
if (numFailed >= failedLimit) {
|
||||||
logger.fatal(
|
logger.fatal(
|
||||||
`Failed threshold reached ${numFailed} >= ${failedLimit}, failing crawl`,
|
`Failed threshold reached ${numFailed} >= ${failedLimit}, failing crawl`,
|
||||||
|
{},
|
||||||
|
"general",
|
||||||
|
ExitCodes.FailedLimit,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interrupt) {
|
if (interrupt) {
|
||||||
this.uploadAndDeleteLocal = true;
|
this.uploadAndDeleteLocal = true;
|
||||||
this.gracefulFinishOnInterrupt();
|
this.gracefulFinishOnInterrupt(interrupt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gracefulFinishOnInterrupt() {
|
gracefulFinishOnInterrupt(interruptReason: InterruptReason) {
|
||||||
this.interrupted = true;
|
this.interruptReason = interruptReason;
|
||||||
logger.info("Crawler interrupted, gracefully finishing current pages");
|
logger.info("Crawler interrupted, gracefully finishing current pages");
|
||||||
if (!this.params.waitOnDone && !this.params.restartsOnError) {
|
if (!this.params.waitOnDone && !this.params.restartsOnError) {
|
||||||
this.finalExit = true;
|
this.finalExit = true;
|
||||||
|
@ -1457,23 +1477,25 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
async serializeAndExit() {
|
async serializeAndExit() {
|
||||||
await this.serializeConfig();
|
await this.serializeConfig();
|
||||||
|
|
||||||
if (this.interrupted) {
|
if (this.interruptReason) {
|
||||||
await this.browser.close();
|
|
||||||
await closeWorkers(0);
|
await closeWorkers(0);
|
||||||
|
await this.browser.close();
|
||||||
await this.closeFiles();
|
await this.closeFiles();
|
||||||
|
|
||||||
if (!this.done) {
|
if (!this.done) {
|
||||||
await this.setStatusAndExit(
|
await this.setStatusAndExit(
|
||||||
ExitCodes.InterruptedImmediate,
|
ExitCodes.SignalInterruptedForce,
|
||||||
"interrupted",
|
"interrupted",
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.setStatusAndExit(ExitCodes.Success, "done");
|
await this.setStatusAndExit(ExitCodes.Success, "done");
|
||||||
}
|
}
|
||||||
|
|
||||||
async isCrawlRunning() {
|
async isCrawlRunning() {
|
||||||
if (this.interrupted) {
|
if (this.interruptReason) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1495,6 +1517,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
this.healthChecker = new HealthChecker(
|
this.healthChecker = new HealthChecker(
|
||||||
this.params.healthCheckPort,
|
this.params.healthCheckPort,
|
||||||
this.params.workers,
|
this.params.workers,
|
||||||
|
this.browser,
|
||||||
async () => {
|
async () => {
|
||||||
await this.updateCurrSize();
|
await this.updateCurrSize();
|
||||||
},
|
},
|
||||||
|
@ -1726,7 +1749,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
if (
|
if (
|
||||||
this.params.generateWACZ &&
|
this.params.generateWACZ &&
|
||||||
!this.params.dryRun &&
|
!this.params.dryRun &&
|
||||||
(!this.interrupted || this.finalExit || this.uploadAndDeleteLocal)
|
(!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
|
||||||
) {
|
) {
|
||||||
const uploaded = await this.generateWACZ();
|
const uploaded = await this.generateWACZ();
|
||||||
|
|
||||||
|
@ -1742,7 +1765,7 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this.params.waitOnDone && (!this.interrupted || this.finalExit)) {
|
if (this.params.waitOnDone && (!this.interruptReason || this.finalExit)) {
|
||||||
this.done = true;
|
this.done = true;
|
||||||
logger.info("All done, waiting for signal...");
|
logger.info("All done, waiting for signal...");
|
||||||
await this.crawlState.setStatus("done");
|
await this.crawlState.setStatus("done");
|
||||||
|
@ -1753,11 +1776,8 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
}
|
}
|
||||||
|
|
||||||
markBrowserCrashed() {
|
markBrowserCrashed() {
|
||||||
this.interrupted = true;
|
this.interruptReason = InterruptReason.BrowserCrashed;
|
||||||
this.browserCrashed = true;
|
this.browser.crashed = true;
|
||||||
if (this.healthChecker) {
|
|
||||||
this.healthChecker.browserCrashed = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async closeLog(): Promise<void> {
|
async closeLog(): Promise<void> {
|
||||||
|
|
|
@ -5,7 +5,7 @@ import { setExitOnRedisError } from "./util/redis.js";
|
||||||
import { Crawler } from "./crawler.js";
|
import { Crawler } from "./crawler.js";
|
||||||
import { ReplayCrawler } from "./replaycrawler.js";
|
import { ReplayCrawler } from "./replaycrawler.js";
|
||||||
import fs from "node:fs";
|
import fs from "node:fs";
|
||||||
import { ExitCodes } from "./util/constants.js";
|
import { ExitCodes, InterruptReason } from "./util/constants.js";
|
||||||
|
|
||||||
let crawler: Crawler | null = null;
|
let crawler: Crawler | null = null;
|
||||||
|
|
||||||
|
@ -29,9 +29,9 @@ async function handleTerminate(signame: string) {
|
||||||
try {
|
try {
|
||||||
await crawler.checkCanceled();
|
await crawler.checkCanceled();
|
||||||
|
|
||||||
if (!crawler.interrupted) {
|
if (!crawler.interruptReason) {
|
||||||
logger.info("SIGNAL: gracefully finishing current pages...");
|
logger.info("SIGNAL: interrupt request received...");
|
||||||
crawler.gracefulFinishOnInterrupt();
|
crawler.gracefulFinishOnInterrupt(InterruptReason.SignalInterrupted);
|
||||||
} else if (forceTerm || Date.now() - lastSigInt > 200) {
|
} else if (forceTerm || Date.now() - lastSigInt > 200) {
|
||||||
logger.info("SIGNAL: stopping crawl now...");
|
logger.info("SIGNAL: stopping crawl now...");
|
||||||
await crawler.serializeAndExit();
|
await crawler.serializeAndExit();
|
||||||
|
|
|
@ -9,7 +9,11 @@ import path from "path";
|
||||||
import { formatErr, LogContext, logger } from "./logger.js";
|
import { formatErr, LogContext, logger } from "./logger.js";
|
||||||
import { initStorage } from "./storage.js";
|
import { initStorage } from "./storage.js";
|
||||||
|
|
||||||
import { DISPLAY, type ServiceWorkerOpt } from "./constants.js";
|
import {
|
||||||
|
DISPLAY,
|
||||||
|
PAGE_OP_TIMEOUT_SECS,
|
||||||
|
type ServiceWorkerOpt,
|
||||||
|
} from "./constants.js";
|
||||||
|
|
||||||
import puppeteer, {
|
import puppeteer, {
|
||||||
Frame,
|
Frame,
|
||||||
|
@ -20,6 +24,7 @@ import puppeteer, {
|
||||||
} from "puppeteer-core";
|
} from "puppeteer-core";
|
||||||
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
|
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
|
||||||
import { Recorder } from "./recorder.js";
|
import { Recorder } from "./recorder.js";
|
||||||
|
import { timedRun } from "./timing.js";
|
||||||
|
|
||||||
type BtrixChromeOpts = {
|
type BtrixChromeOpts = {
|
||||||
proxy?: string;
|
proxy?: string;
|
||||||
|
@ -35,6 +40,7 @@ type LaunchOpts = {
|
||||||
// TODO: Fix this the next time the file is edited.
|
// TODO: Fix this the next time the file is edited.
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
emulateDevice?: Record<string, any>;
|
emulateDevice?: Record<string, any>;
|
||||||
|
|
||||||
ondisconnect?: ((err: unknown) => NonNullable<unknown>) | null;
|
ondisconnect?: ((err: unknown) => NonNullable<unknown>) | null;
|
||||||
|
|
||||||
swOpt?: ServiceWorkerOpt;
|
swOpt?: ServiceWorkerOpt;
|
||||||
|
@ -61,6 +67,8 @@ export class Browser {
|
||||||
|
|
||||||
swOpt?: ServiceWorkerOpt = "disabled";
|
swOpt?: ServiceWorkerOpt = "disabled";
|
||||||
|
|
||||||
|
crashed = false;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
|
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
|
||||||
}
|
}
|
||||||
|
@ -364,9 +372,24 @@ export class Browser {
|
||||||
}
|
}
|
||||||
|
|
||||||
async close() {
|
async close() {
|
||||||
if (this.browser) {
|
if (!this.browser) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.crashed) {
|
||||||
this.browser.removeAllListeners("disconnected");
|
this.browser.removeAllListeners("disconnected");
|
||||||
await this.browser.close();
|
try {
|
||||||
|
await timedRun(
|
||||||
|
this.browser.close(),
|
||||||
|
PAGE_OP_TIMEOUT_SECS,
|
||||||
|
"Closing Browser Timed Out",
|
||||||
|
{},
|
||||||
|
"browser",
|
||||||
|
true,
|
||||||
|
);
|
||||||
|
} catch (e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
this.browser = null;
|
this.browser = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,8 +63,21 @@ export enum ExitCodes {
|
||||||
Failed = 9,
|
Failed = 9,
|
||||||
OutOfSpace = 3,
|
OutOfSpace = 3,
|
||||||
BrowserCrashed = 10,
|
BrowserCrashed = 10,
|
||||||
InterruptedGraceful = 11,
|
SignalInterrupted = 11,
|
||||||
InterruptedImmediate = 13,
|
FailedLimit = 12,
|
||||||
|
SignalInterruptedForce = 13,
|
||||||
|
SizeLimit = 14,
|
||||||
|
TimeLimit = 15,
|
||||||
|
DiskUtilization = 16,
|
||||||
Fatal = 17,
|
Fatal = 17,
|
||||||
ProxyError = 21,
|
ProxyError = 21,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export enum InterruptReason {
|
||||||
|
SizeLimit = 1,
|
||||||
|
TimeLimit = 2,
|
||||||
|
FailedLimit = 3,
|
||||||
|
DiskUtilization = 4,
|
||||||
|
BrowserCrashed = 5,
|
||||||
|
SignalInterrupted = 6,
|
||||||
|
}
|
||||||
|
|
|
@ -1,13 +1,14 @@
|
||||||
import http from "http";
|
import http from "http";
|
||||||
import url from "url";
|
import url from "url";
|
||||||
import { logger } from "./logger.js";
|
import { logger } from "./logger.js";
|
||||||
|
import { Browser } from "./browser.js";
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
export class HealthChecker {
|
export class HealthChecker {
|
||||||
port: number;
|
port: number;
|
||||||
errorThreshold: number;
|
errorThreshold: number;
|
||||||
healthServer: http.Server;
|
healthServer: http.Server;
|
||||||
browserCrashed = false;
|
browser: Browser;
|
||||||
|
|
||||||
updater: (() => Promise<void>) | null;
|
updater: (() => Promise<void>) | null;
|
||||||
|
|
||||||
|
@ -16,9 +17,11 @@ export class HealthChecker {
|
||||||
constructor(
|
constructor(
|
||||||
port: number,
|
port: number,
|
||||||
errorThreshold: number,
|
errorThreshold: number,
|
||||||
|
browser: Browser,
|
||||||
updater: (() => Promise<void>) | null = null,
|
updater: (() => Promise<void>) | null = null,
|
||||||
) {
|
) {
|
||||||
this.port = port;
|
this.port = port;
|
||||||
|
this.browser = browser;
|
||||||
this.errorThreshold = errorThreshold;
|
this.errorThreshold = errorThreshold;
|
||||||
|
|
||||||
this.healthServer = http.createServer((...args) =>
|
this.healthServer = http.createServer((...args) =>
|
||||||
|
@ -34,7 +37,7 @@ export class HealthChecker {
|
||||||
const pathname = req.url ? url.parse(req.url).pathname : "";
|
const pathname = req.url ? url.parse(req.url).pathname : "";
|
||||||
switch (pathname) {
|
switch (pathname) {
|
||||||
case "/healthz":
|
case "/healthz":
|
||||||
if (this.errorCount < this.errorThreshold && !this.browserCrashed) {
|
if (this.errorCount < this.errorThreshold && !this.browser.crashed) {
|
||||||
logger.debug(
|
logger.debug(
|
||||||
`health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`,
|
`health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`,
|
||||||
{},
|
{},
|
||||||
|
|
|
@ -997,7 +997,7 @@ export class Recorder {
|
||||||
while (
|
while (
|
||||||
numPending &&
|
numPending &&
|
||||||
!this.pageFinished &&
|
!this.pageFinished &&
|
||||||
!this.crawler.interrupted &&
|
!this.crawler.interruptReason &&
|
||||||
!this.crawler.postCrawling
|
!this.crawler.postCrawling
|
||||||
) {
|
) {
|
||||||
pending = [];
|
pending = [];
|
||||||
|
|
|
@ -7,7 +7,6 @@ import { rxEscape } from "./seeds.js";
|
||||||
import { CDPSession, Page } from "puppeteer-core";
|
import { CDPSession, Page } from "puppeteer-core";
|
||||||
import { PageState, WorkerId } from "./state.js";
|
import { PageState, WorkerId } from "./state.js";
|
||||||
import { Crawler } from "../crawler.js";
|
import { Crawler } from "../crawler.js";
|
||||||
import { PAGE_OP_TIMEOUT_SECS } from "./constants.js";
|
|
||||||
|
|
||||||
const MAX_REUSE = 5;
|
const MAX_REUSE = 5;
|
||||||
|
|
||||||
|
@ -233,8 +232,7 @@ export class PageWorker {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (retry >= MAX_REUSE) {
|
if (retry >= MAX_REUSE) {
|
||||||
this.crawler.browserCrashed = true;
|
this.crawler.markBrowserCrashed();
|
||||||
this.crawler.interrupted = true;
|
|
||||||
throw new Error("Unable to load new page, browser needs restart");
|
throw new Error("Unable to load new page, browser needs restart");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -433,16 +431,7 @@ export async function runWorkers(
|
||||||
|
|
||||||
await closeWorkers();
|
await closeWorkers();
|
||||||
|
|
||||||
if (!crawler.browserCrashed) {
|
await crawler.browser.close();
|
||||||
await timedRun(
|
|
||||||
crawler.browser.close(),
|
|
||||||
PAGE_OP_TIMEOUT_SECS,
|
|
||||||
"Closing Browser Timed Out",
|
|
||||||
{},
|
|
||||||
"worker",
|
|
||||||
true,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ===========================================================================
|
// ===========================================================================
|
||||||
|
|
|
@ -25,7 +25,46 @@ test("ensure crawl fails if failOnFailedLimit is reached", async () => {
|
||||||
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached",
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached",
|
||||||
);
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
expect(error.code).toEqual(17);
|
expect(error.code).toEqual(12);
|
||||||
|
passed = false;
|
||||||
|
}
|
||||||
|
expect(passed).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("ensure crawl fails if timeLimit is reached", async () => {
|
||||||
|
let passed = true;
|
||||||
|
try {
|
||||||
|
await exec(
|
||||||
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net --failOnInvalidStatus --timeLimit 1 --limit 2 --collection failontimelimitreached",
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
expect(error.code).toEqual(15);
|
||||||
|
passed = false;
|
||||||
|
}
|
||||||
|
expect(passed).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("ensure crawl fails if sizeLimit is reached", async () => {
|
||||||
|
let passed = true;
|
||||||
|
try {
|
||||||
|
await exec(
|
||||||
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net --failOnInvalidStatus --sizeLimit 1 --limit 2 --collection failonsizelimitreached",
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
expect(error.code).toEqual(14);
|
||||||
|
passed = false;
|
||||||
|
}
|
||||||
|
expect(passed).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("ensure crawl fails if diskUtilizationLimit is reached", async () => {
|
||||||
|
let passed = true;
|
||||||
|
try {
|
||||||
|
await exec(
|
||||||
|
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net --failOnInvalidStatus --diskUtilization 1 --limit 2 --collection failonsizelimitreached",
|
||||||
|
);
|
||||||
|
} catch (error) {
|
||||||
|
expect(error.code).toEqual(16);
|
||||||
passed = false;
|
passed = false;
|
||||||
}
|
}
|
||||||
expect(passed).toBe(false);
|
expect(passed).toBe(false);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue