diff --git a/.gitignore b/.gitignore index dfa1820f..a6747aeb 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ collections/ node_modules/ crawls/ test-crawls/ +.DS_Store diff --git a/crawler.js b/crawler.js index 83933fdf..d3a2aa43 100644 --- a/crawler.js +++ b/crawler.js @@ -58,6 +58,8 @@ export class Crawler { const debugLogging = this.params.logging.includes("debug"); logger.setDebugLogging(debugLogging); + logger.setLogLevel(this.params.logLevel); + logger.setContext(this.params.context); logger.debug("Writing log to: " + this.logFilename, {}, "init"); diff --git a/tests/log_filtering.test.js b/tests/log_filtering.test.js new file mode 100644 index 00000000..fbc8bf41 --- /dev/null +++ b/tests/log_filtering.test.js @@ -0,0 +1,47 @@ +import child_process from "child_process"; +import fs from "fs"; +import path from "path"; + + +function jsonLinesToArray(string) { + return string.split("\n") + .filter((line) => { + try { + JSON.parse(line); + return true; + } catch (error) { + return false; + } + }) + .map(line => JSON.parse(line)); +} + + +test("ensure crawl run with log options passes", async () => { + child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url http://specs.webrecorder.net --generateWACZ --collection wr-specs-logs --logging debug,stats --logLevel debug,warn --context general"); +}); + + +test("check that log files exist and were filtered according to options", () => { + const logDir = "test-crawls/collections/wr-specs-logs/logs/"; + const logFiles = []; + fs.readdirSync(logDir).forEach(file => { + if (file.startsWith("crawl-") && file.endsWith(".log")) { + logFiles.push(path.join(logDir, file)); + } + }); + + expect(logFiles.length).toBeGreaterThan(0); + + for (let i=0; i < logFiles.length; i++) { + const logFile = logFiles[i]; + const parsedJSONLines = jsonLinesToArray(fs.readFileSync(logFile, "utf8")); + + expect(parsedJSONLines.length).toBeGreaterThan(0); + + parsedJSONLines.forEach((jsonLine) => { + expect(jsonLine.logLevel === "debug" || jsonLine.logLevel === "warn").toBe(true); + expect(jsonLine.context).toBe("general"); + }); + } +}); diff --git a/util/argParser.js b/util/argParser.js index 4bc6c1e2..4f41dfc3 100644 --- a/util/argParser.js +++ b/util/argParser.js @@ -176,6 +176,18 @@ class ArgParser { default: "stats", }, + "logLevel": { + describe: "Comma-separated list of log levels to include in logs", + type: "string", + default: "", + }, + + "context": { + describe: "Comma-separated list of contexts to include in logs", + type: "string", + default: "", + }, + "text": { describe: "If set, extract text to the pages.jsonl file", type: "boolean", @@ -389,6 +401,8 @@ class ArgParser { // log options argv.logging = argv.logging.split(","); + argv.logLevel = argv.logLevel ? argv.logLevel.split(",") : []; + argv.context = argv.context ? argv.context.split(",") : []; // background behaviors to apply const behaviorOpts = {}; diff --git a/util/logger.js b/util/logger.js index b3cdc1b1..170130fa 100644 --- a/util/logger.js +++ b/util/logger.js @@ -15,6 +15,8 @@ class Logger constructor() { this.logStream = null; this.debugLogging = null; + this.logLevels = []; + this.contexts = []; } setExternalLogStream(logFH) { @@ -25,12 +27,33 @@ class Logger this.debugLogging = debugLog; } + setLogLevel(logLevels) { + this.logLevels = logLevels; + } + + setContext(contexts) { + this.contexts = contexts; + } + logAsJSON(message, data, context, logLevel="info") { if (data instanceof Error) { data = errJSON(data); } else if (typeof data !== "object") { data = {"message": data.toString()}; } + + if (this.logLevels.length) { + if (this.logLevels.indexOf(logLevel) < 0) { + return; + } + } + + if (this.contexts.length) { + if (this.contexts.indexOf(context) < 0) { + return; + } + } + let dataToLog = { "logLevel": logLevel, "timestamp": new Date().toISOString(),