Add Prettier to the repo, and format all the files! (#428)

This adds prettier to the repo, and sets up the pre-commit hook to
auto-format as well as lint.
Also updates ignores files to exclude crawls, test-crawls, scratch, dist as needed.
This commit is contained in:
Emma Segal-Grossman 2023-11-09 19:11:11 -05:00 committed by GitHub
parent af1e0860e4
commit 2a49406df7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
70 changed files with 3192 additions and 2026 deletions

View file

@ -15,81 +15,99 @@ import { Browser } from "./util/browser.js";
import { initStorage } from "./util/storage.js";
import { CDPSession, Page, PuppeteerLifeCycleEvent } from "puppeteer-core";
const profileHTML = fs.readFileSync(new URL("../html/createProfile.html", import.meta.url), {encoding: "utf8"});
const vncHTML = fs.readFileSync(new URL("../html/vnc_lite.html", import.meta.url), {encoding: "utf8"});
const profileHTML = fs.readFileSync(
new URL("../html/createProfile.html", import.meta.url),
{ encoding: "utf8" },
);
const vncHTML = fs.readFileSync(
new URL("../html/vnc_lite.html", import.meta.url),
{ encoding: "utf8" },
);
const behaviors = fs.readFileSync(new URL("../node_modules/browsertrix-behaviors/dist/behaviors.js", import.meta.url), {encoding: "utf8"});
const behaviors = fs.readFileSync(
new URL(
"../node_modules/browsertrix-behaviors/dist/behaviors.js",
import.meta.url,
),
{ encoding: "utf8" },
);
function cliOpts(): { [key: string]: Options } {
function cliOpts(): { [key: string]: Options } {
return {
"url": {
url: {
describe: "The URL of the login page",
type: "string",
demandOption: true,
},
"user": {
describe: "The username for the login. If not specified, will be prompted",
user: {
describe:
"The username for the login. If not specified, will be prompted",
},
"password": {
describe: "The password for the login. If not specified, will be prompted (recommended)",
password: {
describe:
"The password for the login. If not specified, will be prompted (recommended)",
},
"filename": {
filename: {
describe: "The filename for the profile tarball",
default: "/crawls/profiles/profile.tar.gz",
},
"debugScreenshot": {
describe: "If specified, take a screenshot after login and save as this filename"
debugScreenshot: {
describe:
"If specified, take a screenshot after login and save as this filename",
},
"headless": {
headless: {
describe: "Run in headless mode, otherwise start xvfb",
type: "boolean",
default: false,
},
"automated": {
automated: {
describe: "Start in automated mode, no interactive browser",
type: "boolean",
default: false,
},
"interactive": {
interactive: {
describe: "Deprecated. Now the default option!",
type: "boolean",
default: false
default: false,
},
"shutdownWait": {
describe: "Shutdown browser in interactive after this many seconds, if no pings received",
shutdownWait: {
describe:
"Shutdown browser in interactive after this many seconds, if no pings received",
type: "number",
default: 0
default: 0,
},
"profile": {
describe: "Path to tar.gz file which will be extracted and used as the browser profile",
profile: {
describe:
"Path to tar.gz file which will be extracted and used as the browser profile",
type: "string",
},
"windowSize": {
windowSize: {
type: "string",
describe: "Browser window dimensions, specified as: width,height",
default: getDefaultWindowSize()
default: getDefaultWindowSize(),
},
"proxy": {
proxy: {
type: "boolean",
default: false
default: false,
},
"cookieDays": {
cookieDays: {
type: "number",
describe: "If >0, set all cookies, including session cookies, to have this duration in days before saving profile",
default: 7
}
describe:
"If >0, set all cookies, including session cookies, to have this duration in days before saving profile",
default: 7,
},
};
}
@ -100,14 +118,11 @@ function getDefaultWindowSize() {
return `${x},${y}`;
}
async function main() {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const params : any = yargs(process.argv)
const params: any = yargs(process.argv)
.usage("browsertrix-crawler profile [options]")
.option(cliOpts())
.argv;
.option(cliOpts()).argv;
logger.setDebugLogging(true);
@ -122,7 +137,7 @@ async function main() {
process.env.GEOMETRY || "",
"-ac",
"+extension",
"RANDR"
"RANDR",
]);
//await fsp.mkdir(path.join(homedir(), ".vnc"), {recursive: true});
@ -140,7 +155,7 @@ async function main() {
"-passwd",
process.env.VNC_PASS || "",
"-display",
process.env.DISPLAY || ""
process.env.DISPLAY || "",
]);
}
@ -156,13 +171,15 @@ async function main() {
"--window-position=0,0",
`--window-size=${params.windowSize}`,
// to disable the 'stability will suffer' infobar
"--test-type"
]
}
"--test-type",
],
},
});
if (params.interactive) {
logger.warn("Note: the '--interactive' flag is now deprecated and is the default profile creation option. Use the --automated flag to specify non-interactive mode");
logger.warn(
"Note: the '--interactive' flag is now deprecated and is the default profile creation option. Use the --automated flag to specify non-interactive mode",
);
}
if (params.user || params.password) {
@ -179,20 +196,23 @@ async function main() {
const { page, cdp } = await browser.newWindowPageWithCDP();
const waitUntil : PuppeteerLifeCycleEvent = "load";
const waitUntil: PuppeteerLifeCycleEvent = "load";
await page.setCacheEnabled(false);
if (!params.automated) {
await browser.setupPage({page, cdp});
await browser.setupPage({ page, cdp });
// for testing, inject browsertrix-behaviors
await browser.addInitScript(page, behaviors + ";\nself.__bx_behaviors.init();");
await browser.addInitScript(
page,
behaviors + ";\nself.__bx_behaviors.init();",
);
}
logger.info(`Loading page: ${params.url}`);
await page.goto(params.url, {waitUntil});
await page.goto(params.url, { waitUntil });
if (!params.automated) {
const target = await cdp.send("Target.getTargetInfo");
@ -204,20 +224,29 @@ async function main() {
}
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async function automatedProfile(params: any, browser: Browser, page: Page, cdp: CDPSession,
waitUntil: PuppeteerLifeCycleEvent) {
async function automatedProfile(
// TODO: Fix this the next time the file is edited.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
params: any,
browser: Browser,
page: Page,
cdp: CDPSession,
waitUntil: PuppeteerLifeCycleEvent,
) {
let u, p;
logger.debug("Looking for username and password entry fields on page...");
try {
u = await page.waitForSelector("//input[contains(@name, 'user') or contains(@name, 'email')]");
p = await page.waitForSelector("//input[contains(@name, 'pass') and @type='password']");
u = await page.waitForSelector(
"//input[contains(@name, 'user') or contains(@name, 'email')]",
);
p = await page.waitForSelector(
"//input[contains(@name, 'pass') and @type='password']",
);
} catch (e) {
if (params.debugScreenshot) {
await page.screenshot({path: params.debugScreenshot});
await page.screenshot({ path: params.debugScreenshot });
}
logger.debug("Login form could not be found");
await page.close();
@ -231,11 +260,11 @@ async function automatedProfile(params: any, browser: Browser, page: Page, cdp:
await Promise.allSettled([
p!.press("Enter"),
page.waitForNavigation({waitUntil})
page.waitForNavigation({ waitUntil }),
]);
if (params.debugScreenshot) {
await page.screenshot({path: params.debugScreenshot});
await page.screenshot({ path: params.debugScreenshot });
}
await createProfile(params, browser, page, cdp);
@ -243,8 +272,15 @@ async function automatedProfile(params: any, browser: Browser, page: Page, cdp:
process.exit(0);
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
async function createProfile(params: any, browser: Browser, page: Page, cdp: CDPSession, targetFilename = "") {
async function createProfile(
// TODO: Fix this the next time the file is edited.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
params: any,
browser: Browser,
page: Page,
cdp: CDPSession,
targetFilename = "",
) {
await cdp.send("Network.clearBrowserCache");
await browser.close();
@ -252,10 +288,10 @@ async function createProfile(params: any, browser: Browser, page: Page, cdp: CDP
logger.info("Creating profile");
const profileFilename = params.filename || "/crawls/profiles/profile.tar.gz";
const outputDir = path.dirname(profileFilename);
if (outputDir && !fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, {recursive: true});
fs.mkdirSync(outputDir, { recursive: true });
}
browser.saveProfile(profileFilename);
@ -274,9 +310,9 @@ async function createProfile(params: any, browser: Browser, page: Page, cdp: CDP
function promptInput(msg: string, hidden = false) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const rl : any = readline.createInterface({
const rl: any = readline.createInterface({
input: process.stdin,
output: process.stdout
output: process.stdout,
});
if (hidden) {
@ -303,7 +339,6 @@ function promptInput(msg: string, hidden = false) {
});
}
class InteractiveBrowser {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
params: any;
@ -323,7 +358,7 @@ class InteractiveBrowser {
browser: Browser,
page: Page,
cdp: CDPSession,
targetId: string
targetId: string,
) {
logger.info("Creating Profile Interactively...");
child_process.spawn("socat", [
@ -359,19 +394,19 @@ class InteractiveBrowser {
if (this.shutdownWait) {
this.shutdownTimer = setTimeout(() => process.exit(0), this.shutdownWait);
logger.debug(
`Shutting down in ${this.shutdownWait}ms if no ping received`
`Shutting down in ${this.shutdownWait}ms if no ping received`,
);
} else {
this.shutdownTimer = null;
}
const httpServer = http.createServer((req, res) =>
this.handleRequest(req, res)
this.handleRequest(req, res),
);
const port = 9223;
httpServer.listen(port);
logger.info(
`Browser Profile UI Server started. Load http://localhost:${port}/ to interact with a Chromium-based browser, click 'Create Profile' when done.`
`Browser Profile UI Server started. Load http://localhost:${port}/ to interact with a Chromium-based browser, click 'Create Profile' when done.`,
);
if (!params.headless) {
@ -442,141 +477,141 @@ class InteractiveBrowser {
let origins;
switch (pathname) {
case "/":
res.writeHead(200, { "Content-Type": "text/html" });
if (this.params.headless) {
targetUrl = `http://$HOST:9222/devtools/inspector.html?ws=$HOST:9222/devtools/page/${this.targetId}&panel=resources`;
} else {
targetUrl = `http://$HOST:9223/vnc/?host=$HOST&port=6080&password=${process.env.VNC_PASS}`;
}
res.end(
profileHTML.replace(
"$DEVTOOLS_SRC",
targetUrl.replaceAll("$HOST", parsedUrl.hostname)
)
);
return;
case "/vnc/":
case "/vnc/index.html":
res.writeHead(200, { "Content-Type": "text/html" });
res.end(vncHTML);
return;
case "/ping":
if (this.shutdownWait) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
clearTimeout(this.shutdownTimer as any);
this.shutdownTimer = setTimeout(
() => process.exit(0),
this.shutdownWait
case "/":
res.writeHead(200, { "Content-Type": "text/html" });
if (this.params.headless) {
targetUrl = `http://$HOST:9222/devtools/inspector.html?ws=$HOST:9222/devtools/page/${this.targetId}&panel=resources`;
} else {
targetUrl = `http://$HOST:9223/vnc/?host=$HOST&port=6080&password=${process.env.VNC_PASS}`;
}
res.end(
profileHTML.replace(
"$DEVTOOLS_SRC",
targetUrl.replaceAll("$HOST", parsedUrl.hostname),
),
);
logger.debug(
`Ping received, delaying shutdown for ${this.shutdownWait}ms`
);
}
return;
origins = Array.from(this.originSet.values());
case "/vnc/":
case "/vnc/index.html":
res.writeHead(200, { "Content-Type": "text/html" });
res.end(vncHTML);
return;
res.writeHead(200, { "Content-Type": "application/json" });
case "/ping":
if (this.shutdownWait) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
clearTimeout(this.shutdownTimer as any);
this.shutdownTimer = setTimeout(
() => process.exit(0),
this.shutdownWait,
);
logger.debug(
`Ping received, delaying shutdown for ${this.shutdownWait}ms`,
);
}
res.end(JSON.stringify({ pong: true, origins }));
return;
case "/target":
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ targetId: this.targetId }));
return;
case "/vncpass":
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ password: process.env.VNC_PASS }));
return;
case "/navigate":
if (req.method !== "POST") {
break;
}
try {
const postData = await this.readBodyJson(req);
const url = new URL(postData.url).href;
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ success: true }));
this.page.goto(url);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
res.writeHead(400, { "Content-Type": "application/json" });
res.end(JSON.stringify({ error: e.toString() }));
logger.warn("HTTP Error", e);
}
return;
case "/createProfileJS":
if (req.method !== "POST") {
break;
}
try {
const postData = await this.readBodyJson(req);
const targetFilename = postData.filename || "";
await this.saveAllCookies();
const resource = await createProfile(
this.params,
this.browser,
this.page,
this.cdp,
targetFilename
);
origins = Array.from(this.originSet.values());
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ resource, origins }));
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
res.writeHead(500, { "Content-Type": "application/json" });
res.end(JSON.stringify({ error: e.toString() }));
logger.warn("HTTP Error", e);
}
setTimeout(() => process.exit(0), 200);
return;
res.end(JSON.stringify({ pong: true, origins }));
return;
case "/createProfile":
if (req.method !== "POST") {
break;
}
case "/target":
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ targetId: this.targetId }));
return;
try {
await this.saveAllCookies();
case "/vncpass":
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ password: process.env.VNC_PASS }));
return;
await createProfile(this.params, this.browser, this.page, this.cdp);
case "/navigate":
if (req.method !== "POST") {
break;
}
res.writeHead(200, { "Content-Type": "text/html" });
res.end(
"<html><body>Profile Created! You may now close this window.</body></html>"
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
res.writeHead(500, { "Content-Type": "text/html" });
res.end(
"<html><body>Profile creation failed! See the browsertrix-crawler console for more info"
);
logger.warn("HTTP Error", e);
}
try {
const postData = await this.readBodyJson(req);
const url = new URL(postData.url).href;
setTimeout(() => process.exit(0), 200);
return;
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ success: true }));
this.page.goto(url);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
res.writeHead(400, { "Content-Type": "application/json" });
res.end(JSON.stringify({ error: e.toString() }));
logger.warn("HTTP Error", e);
}
return;
case "/createProfileJS":
if (req.method !== "POST") {
break;
}
try {
const postData = await this.readBodyJson(req);
const targetFilename = postData.filename || "";
await this.saveAllCookies();
const resource = await createProfile(
this.params,
this.browser,
this.page,
this.cdp,
targetFilename,
);
origins = Array.from(this.originSet.values());
res.writeHead(200, { "Content-Type": "application/json" });
res.end(JSON.stringify({ resource, origins }));
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
res.writeHead(500, { "Content-Type": "application/json" });
res.end(JSON.stringify({ error: e.toString() }));
logger.warn("HTTP Error", e);
}
setTimeout(() => process.exit(0), 200);
return;
case "/createProfile":
if (req.method !== "POST") {
break;
}
try {
await this.saveAllCookies();
await createProfile(this.params, this.browser, this.page, this.cdp);
res.writeHead(200, { "Content-Type": "text/html" });
res.end(
"<html><body>Profile Created! You may now close this window.</body></html>",
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} catch (e: any) {
res.writeHead(500, { "Content-Type": "text/html" });
res.end(
"<html><body>Profile creation failed! See the browsertrix-crawler console for more info",
);
logger.warn("HTTP Error", e);
}
setTimeout(() => process.exit(0), 200);
return;
}
if (pathname.startsWith("/vnc/")) {
const fileUrl = new URL(
"../node_modules/@novnc/novnc/" + pathname.slice("/vnc/".length),
import.meta.url
import.meta.url,
);
const file = fs.readFileSync(fileUrl, { encoding: "utf-8" });
res.writeHead(200, { "Content-Type": "application/javascript" });
@ -607,6 +642,4 @@ class InteractiveBrowser {
}
}
main();