mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
Support host-specific proxies with proxy config YAML (#837)
- Adds support for YAML-based config for multiple proxies, containing 'matchHosts' section by regex and 'proxies' declaration, allowing matching any number of hosts to any number of named proxies. - Specified via --proxyServerConfig option passed to both crawl and profile creation commands. - Implemented internally by generating a proxy PAC script which does regex matching and running browser with the specified proxy PAC script served by an internal http server. - Also support matching different undici Agents by regex, for using different proxies with direct fetching - Precedence: --proxyServerConfig takes precedence over --proxyServer / PROXY_SERVER, unless --proxyServerPreferSingleProxy is also provided - Updated proxies doc section with example - Updated tests with sample bad and good auth examples of proxy config Fixes #836 --------- Co-authored-by: Tessa Walsh <tessa@bitarchivist.net>
This commit is contained in:
parent
a6ad6a0e42
commit
a42c0b926e
19 changed files with 424 additions and 68 deletions
|
@ -16,7 +16,7 @@ import { initStorage } from "./util/storage.js";
|
|||
import { CDPSession, Page, PuppeteerLifeCycleEvent } from "puppeteer-core";
|
||||
import { getInfoString } from "./util/file_reader.js";
|
||||
import { DISPLAY, ExitCodes } from "./util/constants.js";
|
||||
import { initProxy } from "./util/proxy.js";
|
||||
import { initProxy, loadProxyConfig } from "./util/proxy.js";
|
||||
//import { sleep } from "./util/timing.js";
|
||||
|
||||
const profileHTML = fs.readFileSync(
|
||||
|
@ -123,6 +123,12 @@ function initArgs() {
|
|||
type: "string",
|
||||
},
|
||||
|
||||
proxyServerConfig: {
|
||||
describe:
|
||||
"if set, path to yaml/json file that configures multiple path servers per URL regex",
|
||||
type: "string",
|
||||
},
|
||||
|
||||
sshProxyPrivateKeyFile: {
|
||||
describe:
|
||||
"path to SSH private key for SOCKS5 over SSH proxy connection",
|
||||
|
@ -161,7 +167,9 @@ async function main() {
|
|||
|
||||
process.on("SIGTERM", () => handleTerminate("SIGTERM"));
|
||||
|
||||
const proxyServer = await initProxy(params, false);
|
||||
loadProxyConfig(params);
|
||||
|
||||
const { proxyServer, proxyPacUrl } = await initProxy(params, false);
|
||||
|
||||
if (!params.headless) {
|
||||
logger.debug("Launching XVFB");
|
||||
|
@ -203,7 +211,8 @@ async function main() {
|
|||
headless: params.headless,
|
||||
signals: false,
|
||||
chromeOptions: {
|
||||
proxy: proxyServer,
|
||||
proxyServer,
|
||||
proxyPacUrl,
|
||||
extraArgs: [
|
||||
"--window-position=0,0",
|
||||
`--window-size=${params.windowSize}`,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue