Add more exit codes to detect interruption reason (#764)

Fix #584

- Replace interrupted with interruptReason
- Distinct exit codes for different interrupt reasons: SizeLimit (14), TimeLimit (15), FailedLimit (12), DiskUtilization (16)
are used when an interrupt happens for these reasons, in addition to existing reasons BrowserCrashed (10),
SignalInterrupted (11) and SignalInterruptedForce (13)
- Doc fix to cli args

---------
Co-authored-by: Ilya Kreymer <ikreymer@users.noreply.github.com>
Co-authored-by: Ilya Kreymer <ikreymer@gmail.com>
This commit is contained in:
benoit74 2025-02-10 23:00:55 +01:00 committed by GitHub
parent 846f0355f6
commit fc56c2cf76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 139 additions and 52 deletions

View file

@ -243,7 +243,7 @@ Options:
--maxPageRetries, --retries If set, number of times to retry a p --maxPageRetries, --retries If set, number of times to retry a p
age that failed to load before page age that failed to load before page
is considered to have failed is considered to have failed
[number] [default: 1] [number] [default: 2]
--failOnFailedSeed If set, crawler will fail with exit --failOnFailedSeed If set, crawler will fail with exit
code 1 if any seed fails. When combi code 1 if any seed fails. When combi
ned with --failOnInvalidStatus,will ned with --failOnInvalidStatus,will

View file

@ -48,6 +48,7 @@ import {
PAGE_OP_TIMEOUT_SECS, PAGE_OP_TIMEOUT_SECS,
SITEMAP_INITIAL_FETCH_TIMEOUT_SECS, SITEMAP_INITIAL_FETCH_TIMEOUT_SECS,
ExitCodes, ExitCodes,
InterruptReason,
} from "./util/constants.js"; } from "./util/constants.js";
import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js"; import { AdBlockRules, BlockRuleDecl, BlockRules } from "./util/blockrules.js";
@ -168,8 +169,7 @@ export class Crawler {
skipTextDocs = 0; skipTextDocs = 0;
interrupted = false; interruptReason: InterruptReason | null = null;
browserCrashed = false;
finalExit = false; finalExit = false;
uploadAndDeleteLocal = false; uploadAndDeleteLocal = false;
done = false; done = false;
@ -307,7 +307,7 @@ export class Crawler {
this.healthChecker = null; this.healthChecker = null;
this.interrupted = false; this.interruptReason = null;
this.finalExit = false; this.finalExit = false;
this.uploadAndDeleteLocal = false; this.uploadAndDeleteLocal = false;
@ -596,11 +596,28 @@ export class Crawler {
} else if (stopped) { } else if (stopped) {
status = "done"; status = "done";
logger.info("Crawl gracefully stopped on request"); logger.info("Crawl gracefully stopped on request");
} else if (this.interrupted) { } else if (this.interruptReason) {
status = "interrupted"; status = "interrupted";
exitCode = this.browserCrashed switch (this.interruptReason) {
? ExitCodes.BrowserCrashed case InterruptReason.SizeLimit:
: ExitCodes.InterruptedGraceful; exitCode = ExitCodes.SizeLimit;
break;
case InterruptReason.BrowserCrashed:
exitCode = ExitCodes.BrowserCrashed;
break;
case InterruptReason.SignalInterrupted:
exitCode = ExitCodes.SignalInterrupted;
break;
case InterruptReason.DiskUtilization:
exitCode = ExitCodes.DiskUtilization;
break;
case InterruptReason.FailedLimit:
exitCode = ExitCodes.FailedLimit;
break;
case InterruptReason.TimeLimit:
exitCode = ExitCodes.TimeLimit;
break;
}
} }
} }
} catch (e) { } catch (e) {
@ -1378,7 +1395,7 @@ self.__bx_behaviors.selectMainBehavior();
} }
async checkLimits() { async checkLimits() {
let interrupt = false; let interrupt: InterruptReason | null = null;
const size = await this.updateCurrSize(); const size = await this.updateCurrSize();
@ -1387,7 +1404,7 @@ self.__bx_behaviors.selectMainBehavior();
logger.info( logger.info(
`Size threshold reached ${size} >= ${this.params.sizeLimit}, stopping`, `Size threshold reached ${size} >= ${this.params.sizeLimit}, stopping`,
); );
interrupt = true; interrupt = InterruptReason.SizeLimit;
} }
} }
@ -1397,7 +1414,7 @@ self.__bx_behaviors.selectMainBehavior();
logger.info( logger.info(
`Time threshold reached ${elapsed} > ${this.params.timeLimit}, stopping`, `Time threshold reached ${elapsed} > ${this.params.timeLimit}, stopping`,
); );
interrupt = true; interrupt = InterruptReason.TimeLimit;
} }
} }
@ -1409,7 +1426,7 @@ self.__bx_behaviors.selectMainBehavior();
size, size,
); );
if (diskUtil.stop === true) { if (diskUtil.stop === true) {
interrupt = true; interrupt = InterruptReason.DiskUtilization;
} }
} }
@ -1419,18 +1436,21 @@ self.__bx_behaviors.selectMainBehavior();
if (numFailed >= failedLimit) { if (numFailed >= failedLimit) {
logger.fatal( logger.fatal(
`Failed threshold reached ${numFailed} >= ${failedLimit}, failing crawl`, `Failed threshold reached ${numFailed} >= ${failedLimit}, failing crawl`,
{},
"general",
ExitCodes.FailedLimit,
); );
} }
} }
if (interrupt) { if (interrupt) {
this.uploadAndDeleteLocal = true; this.uploadAndDeleteLocal = true;
this.gracefulFinishOnInterrupt(); this.gracefulFinishOnInterrupt(interrupt);
} }
} }
gracefulFinishOnInterrupt() { gracefulFinishOnInterrupt(interruptReason: InterruptReason) {
this.interrupted = true; this.interruptReason = interruptReason;
logger.info("Crawler interrupted, gracefully finishing current pages"); logger.info("Crawler interrupted, gracefully finishing current pages");
if (!this.params.waitOnDone && !this.params.restartsOnError) { if (!this.params.waitOnDone && !this.params.restartsOnError) {
this.finalExit = true; this.finalExit = true;
@ -1457,23 +1477,25 @@ self.__bx_behaviors.selectMainBehavior();
async serializeAndExit() { async serializeAndExit() {
await this.serializeConfig(); await this.serializeConfig();
if (this.interrupted) { if (this.interruptReason) {
await this.browser.close();
await closeWorkers(0); await closeWorkers(0);
await this.browser.close();
await this.closeFiles(); await this.closeFiles();
if (!this.done) { if (!this.done) {
await this.setStatusAndExit( await this.setStatusAndExit(
ExitCodes.InterruptedImmediate, ExitCodes.SignalInterruptedForce,
"interrupted", "interrupted",
); );
return; return;
} }
} }
await this.setStatusAndExit(ExitCodes.Success, "done"); await this.setStatusAndExit(ExitCodes.Success, "done");
} }
async isCrawlRunning() { async isCrawlRunning() {
if (this.interrupted) { if (this.interruptReason) {
return false; return false;
} }
@ -1495,6 +1517,7 @@ self.__bx_behaviors.selectMainBehavior();
this.healthChecker = new HealthChecker( this.healthChecker = new HealthChecker(
this.params.healthCheckPort, this.params.healthCheckPort,
this.params.workers, this.params.workers,
this.browser,
async () => { async () => {
await this.updateCurrSize(); await this.updateCurrSize();
}, },
@ -1726,7 +1749,7 @@ self.__bx_behaviors.selectMainBehavior();
if ( if (
this.params.generateWACZ && this.params.generateWACZ &&
!this.params.dryRun && !this.params.dryRun &&
(!this.interrupted || this.finalExit || this.uploadAndDeleteLocal) (!this.interruptReason || this.finalExit || this.uploadAndDeleteLocal)
) { ) {
const uploaded = await this.generateWACZ(); const uploaded = await this.generateWACZ();
@ -1742,7 +1765,7 @@ self.__bx_behaviors.selectMainBehavior();
} }
} }
if (this.params.waitOnDone && (!this.interrupted || this.finalExit)) { if (this.params.waitOnDone && (!this.interruptReason || this.finalExit)) {
this.done = true; this.done = true;
logger.info("All done, waiting for signal..."); logger.info("All done, waiting for signal...");
await this.crawlState.setStatus("done"); await this.crawlState.setStatus("done");
@ -1753,11 +1776,8 @@ self.__bx_behaviors.selectMainBehavior();
} }
markBrowserCrashed() { markBrowserCrashed() {
this.interrupted = true; this.interruptReason = InterruptReason.BrowserCrashed;
this.browserCrashed = true; this.browser.crashed = true;
if (this.healthChecker) {
this.healthChecker.browserCrashed = true;
}
} }
async closeLog(): Promise<void> { async closeLog(): Promise<void> {

View file

@ -5,7 +5,7 @@ import { setExitOnRedisError } from "./util/redis.js";
import { Crawler } from "./crawler.js"; import { Crawler } from "./crawler.js";
import { ReplayCrawler } from "./replaycrawler.js"; import { ReplayCrawler } from "./replaycrawler.js";
import fs from "node:fs"; import fs from "node:fs";
import { ExitCodes } from "./util/constants.js"; import { ExitCodes, InterruptReason } from "./util/constants.js";
let crawler: Crawler | null = null; let crawler: Crawler | null = null;
@ -29,9 +29,9 @@ async function handleTerminate(signame: string) {
try { try {
await crawler.checkCanceled(); await crawler.checkCanceled();
if (!crawler.interrupted) { if (!crawler.interruptReason) {
logger.info("SIGNAL: gracefully finishing current pages..."); logger.info("SIGNAL: interrupt request received...");
crawler.gracefulFinishOnInterrupt(); crawler.gracefulFinishOnInterrupt(InterruptReason.SignalInterrupted);
} else if (forceTerm || Date.now() - lastSigInt > 200) { } else if (forceTerm || Date.now() - lastSigInt > 200) {
logger.info("SIGNAL: stopping crawl now..."); logger.info("SIGNAL: stopping crawl now...");
await crawler.serializeAndExit(); await crawler.serializeAndExit();

View file

@ -9,7 +9,11 @@ import path from "path";
import { formatErr, LogContext, logger } from "./logger.js"; import { formatErr, LogContext, logger } from "./logger.js";
import { initStorage } from "./storage.js"; import { initStorage } from "./storage.js";
import { DISPLAY, type ServiceWorkerOpt } from "./constants.js"; import {
DISPLAY,
PAGE_OP_TIMEOUT_SECS,
type ServiceWorkerOpt,
} from "./constants.js";
import puppeteer, { import puppeteer, {
Frame, Frame,
@ -20,6 +24,7 @@ import puppeteer, {
} from "puppeteer-core"; } from "puppeteer-core";
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core"; import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
import { Recorder } from "./recorder.js"; import { Recorder } from "./recorder.js";
import { timedRun } from "./timing.js";
type BtrixChromeOpts = { type BtrixChromeOpts = {
proxy?: string; proxy?: string;
@ -35,6 +40,7 @@ type LaunchOpts = {
// TODO: Fix this the next time the file is edited. // TODO: Fix this the next time the file is edited.
// eslint-disable-next-line @typescript-eslint/no-explicit-any // eslint-disable-next-line @typescript-eslint/no-explicit-any
emulateDevice?: Record<string, any>; emulateDevice?: Record<string, any>;
ondisconnect?: ((err: unknown) => NonNullable<unknown>) | null; ondisconnect?: ((err: unknown) => NonNullable<unknown>) | null;
swOpt?: ServiceWorkerOpt; swOpt?: ServiceWorkerOpt;
@ -61,6 +67,8 @@ export class Browser {
swOpt?: ServiceWorkerOpt = "disabled"; swOpt?: ServiceWorkerOpt = "disabled";
crashed = false;
constructor() { constructor() {
this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-")); this.profileDir = fs.mkdtempSync(path.join(os.tmpdir(), "profile-"));
} }
@ -364,9 +372,24 @@ export class Browser {
} }
async close() { async close() {
if (this.browser) { if (!this.browser) {
return;
}
if (!this.crashed) {
this.browser.removeAllListeners("disconnected"); this.browser.removeAllListeners("disconnected");
await this.browser.close(); try {
await timedRun(
this.browser.close(),
PAGE_OP_TIMEOUT_SECS,
"Closing Browser Timed Out",
{},
"browser",
true,
);
} catch (e) {
// ignore
}
this.browser = null; this.browser = null;
} }
} }

View file

@ -63,8 +63,21 @@ export enum ExitCodes {
Failed = 9, Failed = 9,
OutOfSpace = 3, OutOfSpace = 3,
BrowserCrashed = 10, BrowserCrashed = 10,
InterruptedGraceful = 11, SignalInterrupted = 11,
InterruptedImmediate = 13, FailedLimit = 12,
SignalInterruptedForce = 13,
SizeLimit = 14,
TimeLimit = 15,
DiskUtilization = 16,
Fatal = 17, Fatal = 17,
ProxyError = 21, ProxyError = 21,
} }
export enum InterruptReason {
SizeLimit = 1,
TimeLimit = 2,
FailedLimit = 3,
DiskUtilization = 4,
BrowserCrashed = 5,
SignalInterrupted = 6,
}

View file

@ -1,13 +1,14 @@
import http from "http"; import http from "http";
import url from "url"; import url from "url";
import { logger } from "./logger.js"; import { logger } from "./logger.js";
import { Browser } from "./browser.js";
// =========================================================================== // ===========================================================================
export class HealthChecker { export class HealthChecker {
port: number; port: number;
errorThreshold: number; errorThreshold: number;
healthServer: http.Server; healthServer: http.Server;
browserCrashed = false; browser: Browser;
updater: (() => Promise<void>) | null; updater: (() => Promise<void>) | null;
@ -16,9 +17,11 @@ export class HealthChecker {
constructor( constructor(
port: number, port: number,
errorThreshold: number, errorThreshold: number,
browser: Browser,
updater: (() => Promise<void>) | null = null, updater: (() => Promise<void>) | null = null,
) { ) {
this.port = port; this.port = port;
this.browser = browser;
this.errorThreshold = errorThreshold; this.errorThreshold = errorThreshold;
this.healthServer = http.createServer((...args) => this.healthServer = http.createServer((...args) =>
@ -34,7 +37,7 @@ export class HealthChecker {
const pathname = req.url ? url.parse(req.url).pathname : ""; const pathname = req.url ? url.parse(req.url).pathname : "";
switch (pathname) { switch (pathname) {
case "/healthz": case "/healthz":
if (this.errorCount < this.errorThreshold && !this.browserCrashed) { if (this.errorCount < this.errorThreshold && !this.browser.crashed) {
logger.debug( logger.debug(
`health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`, `health check ok, num errors ${this.errorCount} < ${this.errorThreshold}`,
{}, {},

View file

@ -997,7 +997,7 @@ export class Recorder {
while ( while (
numPending && numPending &&
!this.pageFinished && !this.pageFinished &&
!this.crawler.interrupted && !this.crawler.interruptReason &&
!this.crawler.postCrawling !this.crawler.postCrawling
) { ) {
pending = []; pending = [];

View file

@ -7,7 +7,6 @@ import { rxEscape } from "./seeds.js";
import { CDPSession, Page } from "puppeteer-core"; import { CDPSession, Page } from "puppeteer-core";
import { PageState, WorkerId } from "./state.js"; import { PageState, WorkerId } from "./state.js";
import { Crawler } from "../crawler.js"; import { Crawler } from "../crawler.js";
import { PAGE_OP_TIMEOUT_SECS } from "./constants.js";
const MAX_REUSE = 5; const MAX_REUSE = 5;
@ -233,8 +232,7 @@ export class PageWorker {
} }
if (retry >= MAX_REUSE) { if (retry >= MAX_REUSE) {
this.crawler.browserCrashed = true; this.crawler.markBrowserCrashed();
this.crawler.interrupted = true;
throw new Error("Unable to load new page, browser needs restart"); throw new Error("Unable to load new page, browser needs restart");
} }
@ -433,16 +431,7 @@ export async function runWorkers(
await closeWorkers(); await closeWorkers();
if (!crawler.browserCrashed) { await crawler.browser.close();
await timedRun(
crawler.browser.close(),
PAGE_OP_TIMEOUT_SECS,
"Closing Browser Timed Out",
{},
"worker",
true,
);
}
} }
// =========================================================================== // ===========================================================================

View file

@ -25,7 +25,46 @@ test("ensure crawl fails if failOnFailedLimit is reached", async () => {
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached", "docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://old.webrecorder.net/will404 --url https://specs.webrecorder.net --failOnInvalidStatus --failOnFailedLimit 1 --limit 10 --collection faillimitreached",
); );
} catch (error) { } catch (error) {
expect(error.code).toEqual(17); expect(error.code).toEqual(12);
passed = false;
}
expect(passed).toBe(false);
});
test("ensure crawl fails if timeLimit is reached", async () => {
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net --failOnInvalidStatus --timeLimit 1 --limit 2 --collection failontimelimitreached",
);
} catch (error) {
expect(error.code).toEqual(15);
passed = false;
}
expect(passed).toBe(false);
});
test("ensure crawl fails if sizeLimit is reached", async () => {
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net --failOnInvalidStatus --sizeLimit 1 --limit 2 --collection failonsizelimitreached",
);
} catch (error) {
expect(error.code).toEqual(14);
passed = false;
}
expect(passed).toBe(false);
});
test("ensure crawl fails if diskUtilizationLimit is reached", async () => {
let passed = true;
try {
await exec(
"docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://specs.webrecorder.net --failOnInvalidStatus --diskUtilization 1 --limit 2 --collection failonsizelimitreached",
);
} catch (error) {
expect(error.code).toEqual(16);
passed = false; passed = false;
} }
expect(passed).toBe(false); expect(passed).toBe(false);