mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
Add options to filter logs by --logLevel and --context (#271)
* Add .DS_Store to gitignore * Add --logLevel and --context filtering options * Add log filtering test
This commit is contained in:
parent
746d80adc7
commit
62fe4b4a99
5 changed files with 87 additions and 0 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,3 +5,4 @@ collections/
|
|||
node_modules/
|
||||
crawls/
|
||||
test-crawls/
|
||||
.DS_Store
|
||||
|
|
|
@ -58,6 +58,8 @@ export class Crawler {
|
|||
|
||||
const debugLogging = this.params.logging.includes("debug");
|
||||
logger.setDebugLogging(debugLogging);
|
||||
logger.setLogLevel(this.params.logLevel);
|
||||
logger.setContext(this.params.context);
|
||||
|
||||
logger.debug("Writing log to: " + this.logFilename, {}, "init");
|
||||
|
||||
|
|
47
tests/log_filtering.test.js
Normal file
47
tests/log_filtering.test.js
Normal file
|
@ -0,0 +1,47 @@
|
|||
import child_process from "child_process";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
|
||||
function jsonLinesToArray(string) {
|
||||
return string.split("\n")
|
||||
.filter((line) => {
|
||||
try {
|
||||
JSON.parse(line);
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
})
|
||||
.map(line => JSON.parse(line));
|
||||
}
|
||||
|
||||
|
||||
test("ensure crawl run with log options passes", async () => {
|
||||
child_process.execSync("docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url http://specs.webrecorder.net --generateWACZ --collection wr-specs-logs --logging debug,stats --logLevel debug,warn --context general");
|
||||
});
|
||||
|
||||
|
||||
test("check that log files exist and were filtered according to options", () => {
|
||||
const logDir = "test-crawls/collections/wr-specs-logs/logs/";
|
||||
const logFiles = [];
|
||||
fs.readdirSync(logDir).forEach(file => {
|
||||
if (file.startsWith("crawl-") && file.endsWith(".log")) {
|
||||
logFiles.push(path.join(logDir, file));
|
||||
}
|
||||
});
|
||||
|
||||
expect(logFiles.length).toBeGreaterThan(0);
|
||||
|
||||
for (let i=0; i < logFiles.length; i++) {
|
||||
const logFile = logFiles[i];
|
||||
const parsedJSONLines = jsonLinesToArray(fs.readFileSync(logFile, "utf8"));
|
||||
|
||||
expect(parsedJSONLines.length).toBeGreaterThan(0);
|
||||
|
||||
parsedJSONLines.forEach((jsonLine) => {
|
||||
expect(jsonLine.logLevel === "debug" || jsonLine.logLevel === "warn").toBe(true);
|
||||
expect(jsonLine.context).toBe("general");
|
||||
});
|
||||
}
|
||||
});
|
|
@ -176,6 +176,18 @@ class ArgParser {
|
|||
default: "stats",
|
||||
},
|
||||
|
||||
"logLevel": {
|
||||
describe: "Comma-separated list of log levels to include in logs",
|
||||
type: "string",
|
||||
default: "",
|
||||
},
|
||||
|
||||
"context": {
|
||||
describe: "Comma-separated list of contexts to include in logs",
|
||||
type: "string",
|
||||
default: "",
|
||||
},
|
||||
|
||||
"text": {
|
||||
describe: "If set, extract text to the pages.jsonl file",
|
||||
type: "boolean",
|
||||
|
@ -389,6 +401,8 @@ class ArgParser {
|
|||
|
||||
// log options
|
||||
argv.logging = argv.logging.split(",");
|
||||
argv.logLevel = argv.logLevel ? argv.logLevel.split(",") : [];
|
||||
argv.context = argv.context ? argv.context.split(",") : [];
|
||||
|
||||
// background behaviors to apply
|
||||
const behaviorOpts = {};
|
||||
|
|
|
@ -15,6 +15,8 @@ class Logger
|
|||
constructor() {
|
||||
this.logStream = null;
|
||||
this.debugLogging = null;
|
||||
this.logLevels = [];
|
||||
this.contexts = [];
|
||||
}
|
||||
|
||||
setExternalLogStream(logFH) {
|
||||
|
@ -25,12 +27,33 @@ class Logger
|
|||
this.debugLogging = debugLog;
|
||||
}
|
||||
|
||||
setLogLevel(logLevels) {
|
||||
this.logLevels = logLevels;
|
||||
}
|
||||
|
||||
setContext(contexts) {
|
||||
this.contexts = contexts;
|
||||
}
|
||||
|
||||
logAsJSON(message, data, context, logLevel="info") {
|
||||
if (data instanceof Error) {
|
||||
data = errJSON(data);
|
||||
} else if (typeof data !== "object") {
|
||||
data = {"message": data.toString()};
|
||||
}
|
||||
|
||||
if (this.logLevels.length) {
|
||||
if (this.logLevels.indexOf(logLevel) < 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.contexts.length) {
|
||||
if (this.contexts.indexOf(context) < 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let dataToLog = {
|
||||
"logLevel": logLevel,
|
||||
"timestamp": new Date().toISOString(),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue