mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
Compare commits
3 commits
2270964996
...
6f26148a9b
Author | SHA1 | Date | |
---|---|---|---|
![]() |
6f26148a9b | ||
![]() |
4f234040ce | ||
![]() |
002feb287b |
5 changed files with 44 additions and 37 deletions
|
@ -1,6 +1,6 @@
|
||||||
{
|
{
|
||||||
"name": "browsertrix-crawler",
|
"name": "browsertrix-crawler",
|
||||||
"version": "1.8.0",
|
"version": "1.8.1",
|
||||||
"main": "browsertrix-crawler",
|
"main": "browsertrix-crawler",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||||
|
|
|
@ -852,31 +852,34 @@ self.__bx_behaviors.selectMainBehavior();
|
||||||
await this.browser.addInitScript(page, initScript);
|
await this.browser.addInitScript(page, initScript);
|
||||||
}
|
}
|
||||||
|
|
||||||
// only add if running with autoclick behavior
|
// Handle JS dialogs:
|
||||||
if (this.params.behaviors.includes("autoclick")) {
|
// - Ensure off-page navigation is canceled while behavior is running
|
||||||
// Ensure off-page navigation is canceled while behavior is running
|
// - dismiss close all other dialogs if not blocking unload
|
||||||
page.on("dialog", async (dialog) => {
|
page.on("dialog", async (dialog) => {
|
||||||
let accepted = true;
|
let accepted = true;
|
||||||
if (dialog.type() === "beforeunload") {
|
if (dialog.type() === "beforeunload") {
|
||||||
if (opts.pageBlockUnload) {
|
if (opts.pageBlockUnload) {
|
||||||
accepted = false;
|
accepted = false;
|
||||||
await dialog.dismiss();
|
await dialog.dismiss();
|
||||||
} else {
|
|
||||||
await dialog.accept();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
await dialog.accept();
|
await dialog.accept();
|
||||||
}
|
}
|
||||||
logger.debug("JS Dialog", {
|
} else {
|
||||||
accepted,
|
// other JS dialog, just dismiss
|
||||||
blockingUnload: opts.pageBlockUnload,
|
await dialog.dismiss();
|
||||||
message: dialog.message(),
|
}
|
||||||
type: dialog.type(),
|
logger.debug("JS Dialog", {
|
||||||
page: page.url(),
|
accepted,
|
||||||
workerid,
|
blockingUnload: opts.pageBlockUnload,
|
||||||
});
|
message: dialog.message(),
|
||||||
|
type: dialog.type(),
|
||||||
|
page: page.url(),
|
||||||
|
workerid,
|
||||||
});
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// only add if running with autoclick behavior
|
||||||
|
if (this.params.behaviors.includes("autoclick")) {
|
||||||
// Close any windows opened during navigation from autoclick
|
// Close any windows opened during navigation from autoclick
|
||||||
await cdp.send("Target.setDiscoverTargets", { discover: true });
|
await cdp.send("Target.setDiscoverTargets", { discover: true });
|
||||||
|
|
||||||
|
|
|
@ -339,7 +339,11 @@ async function createProfile(
|
||||||
cdp: CDPSession,
|
cdp: CDPSession,
|
||||||
targetFilename = "",
|
targetFilename = "",
|
||||||
) {
|
) {
|
||||||
await cdp.send("Network.clearBrowserCache");
|
try {
|
||||||
|
await cdp.send("Network.clearBrowserCache");
|
||||||
|
} catch (e) {
|
||||||
|
logger.warn("Error clearing cache", e, "browser");
|
||||||
|
}
|
||||||
|
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|
||||||
|
@ -546,7 +550,8 @@ class InteractiveBrowser {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const cookies = await this.browser.getCookies(this.page);
|
const cookies = await this.browser.getCookies();
|
||||||
|
|
||||||
for (const cookieOrig of cookies) {
|
for (const cookieOrig of cookies) {
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
const cookie = cookieOrig as any;
|
const cookie = cookieOrig as any;
|
||||||
|
@ -566,7 +571,7 @@ class InteractiveBrowser {
|
||||||
cookie.url = url;
|
cookie.url = url;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
await this.browser.setCookies(this.page, cookies);
|
await this.browser.setCookies(cookies);
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
} catch (e: any) {
|
} catch (e: any) {
|
||||||
logger.error("Save Cookie Error: ", e);
|
logger.error("Save Cookie Error: ", e);
|
||||||
|
|
|
@ -22,6 +22,7 @@ import puppeteer, {
|
||||||
Page,
|
Page,
|
||||||
LaunchOptions,
|
LaunchOptions,
|
||||||
Viewport,
|
Viewport,
|
||||||
|
CookieData,
|
||||||
} from "puppeteer-core";
|
} from "puppeteer-core";
|
||||||
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
|
import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
|
||||||
import { Recorder } from "./recorder.js";
|
import { Recorder } from "./recorder.js";
|
||||||
|
@ -616,14 +617,12 @@ export class Browser {
|
||||||
await page.setViewport(params);
|
await page.setViewport(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
async getCookies(page: Page) {
|
async getCookies() {
|
||||||
return await page.cookies();
|
return (await this.browser?.cookies()) || [];
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Fix this the next time the file is edited.
|
async setCookies(cookies: CookieData[]) {
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
return await this.browser?.setCookie(...cookies);
|
||||||
async setCookies(page: Page, cookies: any) {
|
|
||||||
return await page.setCookie(...cookies);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -153,13 +153,6 @@ export async function initProxy(
|
||||||
privateKeyFile = privateKeyFile || sshProxyPrivateKeyFile;
|
privateKeyFile = privateKeyFile || sshProxyPrivateKeyFile;
|
||||||
publicHostsFile = publicHostsFile || sshProxyKnownHostsFile;
|
publicHostsFile = publicHostsFile || sshProxyKnownHostsFile;
|
||||||
|
|
||||||
logger.debug("Initing proxy", {
|
|
||||||
url: getSafeProxyString(proxyUrl),
|
|
||||||
localPort,
|
|
||||||
privateKeyFile,
|
|
||||||
publicHostsFile,
|
|
||||||
});
|
|
||||||
|
|
||||||
const entry = await initSingleProxy(
|
const entry = await initSingleProxy(
|
||||||
proxyUrl,
|
proxyUrl,
|
||||||
localPort++,
|
localPort++,
|
||||||
|
@ -200,6 +193,13 @@ export async function initSingleProxy(
|
||||||
sshProxyPrivateKeyFile?: string,
|
sshProxyPrivateKeyFile?: string,
|
||||||
sshProxyKnownHostsFile?: string,
|
sshProxyKnownHostsFile?: string,
|
||||||
): Promise<{ proxyUrl: string; dispatcher: Dispatcher }> {
|
): Promise<{ proxyUrl: string; dispatcher: Dispatcher }> {
|
||||||
|
logger.debug("Initing proxy", {
|
||||||
|
url: getSafeProxyString(proxyUrl),
|
||||||
|
localPort,
|
||||||
|
sshProxyPrivateKeyFile,
|
||||||
|
sshProxyKnownHostsFile,
|
||||||
|
});
|
||||||
|
|
||||||
if (proxyUrl && proxyUrl.startsWith("ssh://")) {
|
if (proxyUrl && proxyUrl.startsWith("ssh://")) {
|
||||||
proxyUrl = await runSSHD(
|
proxyUrl = await runSSHD(
|
||||||
proxyUrl,
|
proxyUrl,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue