mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Add options to filter logs by --logLevel and --context (#271)
* Add .DS_Store to gitignore * Add --logLevel and --context filtering options * Add log filtering test
This commit is contained in:
parent
746d80adc7
commit
62fe4b4a99
5 changed files with 87 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,3 +5,4 @@ collections/
|
||||||
node_modules/
|
node_modules/
|
||||||
crawls/
|
crawls/
|
||||||
test-crawls/
|
test-crawls/
|
||||||
|
.DS_Store
|
||||||
|
|
|
@ -58,6 +58,8 @@ export class Crawler {
|
||||||
|
|
||||||
const debugLogging = this.params.logging.includes("debug");
|
const debugLogging = this.params.logging.includes("debug");
|
||||||
logger.setDebugLogging(debugLogging);
|
logger.setDebugLogging(debugLogging);
|
||||||
|
logger.setLogLevel(this.params.logLevel);
|
||||||
|
logger.setContext(this.params.context);
|
||||||
|
|
||||||
logger.debug("Writing log to: " + this.logFilename, {}, "init");
|
logger.debug("Writing log to: " + this.logFilename, {}, "init");
|
||||||
|
|
||||||
|
|
47
tests/log_filtering.test.js
Normal file
47
tests/log_filtering.test.js
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
import child_process from "child_process";
|
||||||
|
import fs from "fs";
|
||||||
|
import path from "path";
|
||||||
|
|
||||||
|
|
||||||
|
function jsonLinesToArray(string) {
|
||||||
|
return string.split("\n")
|
||||||
|
.filter((line) => {
|
||||||
|
try {
|
||||||
|
JSON.parse(line);
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.map(line => JSON.parse(line));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
test("ensure crawl run with log options passes", async () => {
|
||||||
|
child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url http://specs.webrecorder.net --generateWACZ --collection wr-specs-logs --logging debug,stats --logLevel debug,warn --context general");
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
test("check that log files exist and were filtered according to options", () => {
|
||||||
|
const logDir = "test-crawls/collections/wr-specs-logs/logs/";
|
||||||
|
const logFiles = [];
|
||||||
|
fs.readdirSync(logDir).forEach(file => {
|
||||||
|
if (file.startsWith("crawl-") && file.endsWith(".log")) {
|
||||||
|
logFiles.push(path.join(logDir, file));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(logFiles.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
for (let i=0; i < logFiles.length; i++) {
|
||||||
|
const logFile = logFiles[i];
|
||||||
|
const parsedJSONLines = jsonLinesToArray(fs.readFileSync(logFile, "utf8"));
|
||||||
|
|
||||||
|
expect(parsedJSONLines.length).toBeGreaterThan(0);
|
||||||
|
|
||||||
|
parsedJSONLines.forEach((jsonLine) => {
|
||||||
|
expect(jsonLine.logLevel === "debug" || jsonLine.logLevel === "warn").toBe(true);
|
||||||
|
expect(jsonLine.context).toBe("general");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
|
@ -176,6 +176,18 @@ class ArgParser {
|
||||||
default: "stats",
|
default: "stats",
|
||||||
},
|
},
|
||||||
|
|
||||||
|
"logLevel": {
|
||||||
|
describe: "Comma-separated list of log levels to include in logs",
|
||||||
|
type: "string",
|
||||||
|
default: "",
|
||||||
|
},
|
||||||
|
|
||||||
|
"context": {
|
||||||
|
describe: "Comma-separated list of contexts to include in logs",
|
||||||
|
type: "string",
|
||||||
|
default: "",
|
||||||
|
},
|
||||||
|
|
||||||
"text": {
|
"text": {
|
||||||
describe: "If set, extract text to the pages.jsonl file",
|
describe: "If set, extract text to the pages.jsonl file",
|
||||||
type: "boolean",
|
type: "boolean",
|
||||||
|
@ -389,6 +401,8 @@ class ArgParser {
|
||||||
|
|
||||||
// log options
|
// log options
|
||||||
argv.logging = argv.logging.split(",");
|
argv.logging = argv.logging.split(",");
|
||||||
|
argv.logLevel = argv.logLevel ? argv.logLevel.split(",") : [];
|
||||||
|
argv.context = argv.context ? argv.context.split(",") : [];
|
||||||
|
|
||||||
// background behaviors to apply
|
// background behaviors to apply
|
||||||
const behaviorOpts = {};
|
const behaviorOpts = {};
|
||||||
|
|
|
@ -15,6 +15,8 @@ class Logger
|
||||||
constructor() {
|
constructor() {
|
||||||
this.logStream = null;
|
this.logStream = null;
|
||||||
this.debugLogging = null;
|
this.debugLogging = null;
|
||||||
|
this.logLevels = [];
|
||||||
|
this.contexts = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
setExternalLogStream(logFH) {
|
setExternalLogStream(logFH) {
|
||||||
|
@ -25,12 +27,33 @@ class Logger
|
||||||
this.debugLogging = debugLog;
|
this.debugLogging = debugLog;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setLogLevel(logLevels) {
|
||||||
|
this.logLevels = logLevels;
|
||||||
|
}
|
||||||
|
|
||||||
|
setContext(contexts) {
|
||||||
|
this.contexts = contexts;
|
||||||
|
}
|
||||||
|
|
||||||
logAsJSON(message, data, context, logLevel="info") {
|
logAsJSON(message, data, context, logLevel="info") {
|
||||||
if (data instanceof Error) {
|
if (data instanceof Error) {
|
||||||
data = errJSON(data);
|
data = errJSON(data);
|
||||||
} else if (typeof data !== "object") {
|
} else if (typeof data !== "object") {
|
||||||
data = {"message": data.toString()};
|
data = {"message": data.toString()};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this.logLevels.length) {
|
||||||
|
if (this.logLevels.indexOf(logLevel) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.contexts.length) {
|
||||||
|
if (this.contexts.indexOf(context) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let dataToLog = {
|
let dataToLog = {
|
||||||
"logLevel": logLevel,
|
"logLevel": logLevel,
|
||||||
"timestamp": new Date().toISOString(),
|
"timestamp": new Date().toISOString(),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue