mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
SOCKS5 over SSH Tunnel Support (#671)
- Adds support for running a SOCKS5 proxy over an SSH connection. This can be configured by using `--proxyServer ssh://user@host[:port]` config and also passing an `--sshProxyPrivateKeyFile <private key file>` file param and an optional `--sshProxyKnownHostsFile <public host key file>`file param. The key files are expected to be mounted as volumes into the crawler. - Same arguments are also available for create-login-profile - The proxy config uses autossh to establish a more robust connection, and also waits until a connection can be established before proceeding. - Docs are updated to include a new 'Crawling with Proxies' page in the user guide - Tests are updated to include crawling through an SSH proxy running locally. --------- Co-authored-by: Vinzenz Sinapius <Vinzenz.Sinapius@gmail.com>
This commit is contained in:
parent
39c8f48bb2
commit
8934feaf70
12 changed files with 347 additions and 37 deletions
|
@ -91,6 +91,8 @@ code {
|
|||
border-width: 1px;
|
||||
border-color: #d1d5db;
|
||||
border-style: solid;
|
||||
|
||||
white-space : pre-wrap !important;
|
||||
}
|
||||
|
||||
.md-typeset h1,
|
||||
|
|
|
@ -94,15 +94,15 @@ Options:
|
|||
, "state", "redis", "storage", "text", "exclusion", "screenshots", "screencast
|
||||
", "originOverride", "healthcheck", "browser", "blocking", "behavior", "behavi
|
||||
orScript", "jsError", "fetch", "pageStatus", "memoryStatus", "crawlStatus", "l
|
||||
inks", "sitemap", "replay"] [default: []]
|
||||
inks", "sitemap", "replay", "proxy"] [default: []]
|
||||
--logExcludeContext Comma-separated list of contexts to
|
||||
NOT include in logs
|
||||
[array] [choices: "general", "worker", "recorder", "recorderNetwork", "writer"
|
||||
, "state", "redis", "storage", "text", "exclusion", "screenshots", "screencast
|
||||
", "originOverride", "healthcheck", "browser", "blocking", "behavior", "behavi
|
||||
orScript", "jsError", "fetch", "pageStatus", "memoryStatus", "crawlStatus", "l
|
||||
inks", "sitemap", "replay"] [default: ["recorderNetwork","jsError","screencast
|
||||
"]]
|
||||
inks", "sitemap", "replay", "proxy"] [default: ["recorderNetwork","jsError","s
|
||||
creencast"]]
|
||||
--text Extract initial (default) or final t
|
||||
ext to pages.jsonl or WARC resource
|
||||
record(s)
|
||||
|
@ -271,6 +271,11 @@ Options:
|
|||
--qaDebugImageDiff if specified, will write crawl.png,
|
||||
replay.png and diff.png for each pag
|
||||
e where they're different [boolean]
|
||||
--sshProxyPrivateKeyFile path to SSH private key for SOCKS5 o
|
||||
ver SSH proxy connection [string]
|
||||
--sshProxyKnownHostsFile path to SSH known hosts file for SOC
|
||||
KS5 over SSH proxy connection
|
||||
[string]
|
||||
--config Path to YAML config file
|
||||
```
|
||||
|
||||
|
@ -278,33 +283,37 @@ Options:
|
|||
|
||||
```
|
||||
Options:
|
||||
--help Show help [boolean]
|
||||
--version Show version number [boolean]
|
||||
--url The URL of the login page [string] [required]
|
||||
--user The username for the login. If not specified, will be promp
|
||||
ted
|
||||
--password The password for the login. If not specified, will be promp
|
||||
ted (recommended)
|
||||
--filename The filename for the profile tarball, stored within /crawls
|
||||
/profiles if absolute path not provided
|
||||
--help Show help [boolean]
|
||||
--version Show version number [boolean]
|
||||
--url The URL of the login page [string] [required]
|
||||
--user The username for the login. If not specified, will b
|
||||
e prompted
|
||||
--password The password for the login. If not specified, will b
|
||||
e prompted (recommended)
|
||||
--filename The filename for the profile tarball, stored within
|
||||
/crawls/profiles if absolute path not provided
|
||||
[default: "/crawls/profiles/profile.tar.gz"]
|
||||
--debugScreenshot If specified, take a screenshot after login and save as thi
|
||||
s filename
|
||||
--headless Run in headless mode, otherwise start xvfb
|
||||
--debugScreenshot If specified, take a screenshot after login and save
|
||||
as this filename
|
||||
--headless Run in headless mode, otherwise start xvfb
|
||||
[boolean] [default: false]
|
||||
--automated Start in automated mode, no interactive browser
|
||||
--automated Start in automated mode, no interactive browser
|
||||
[boolean] [default: false]
|
||||
--interactive Deprecated. Now the default option!
|
||||
--interactive Deprecated. Now the default option!
|
||||
[boolean] [default: false]
|
||||
--shutdownWait Shutdown browser in interactive after this many seconds, if
|
||||
no pings received [number] [default: 0]
|
||||
--profile Path or HTTP(S) URL to tar.gz file which contains the brows
|
||||
er profile directory [string]
|
||||
--windowSize Browser window dimensions, specified as: width,height
|
||||
[string] [default: "1360,1020"]
|
||||
--proxyServer if set, will use specified proxy server. Takes precedence o
|
||||
ver any env var proxy settings [string]
|
||||
--cookieDays If >0, set all cookies, including session cookies, to have
|
||||
this duration in days before saving profile
|
||||
--shutdownWait Shutdown browser in interactive after this many seco
|
||||
nds, if no pings received [number] [default: 0]
|
||||
--profile Path or HTTP(S) URL to tar.gz file which contains th
|
||||
e browser profile directory [string]
|
||||
--windowSize Browser window dimensions, specified as: width,heigh
|
||||
t [string] [default: "1360,1020"]
|
||||
--cookieDays If >0, set all cookies, including session cookies, t
|
||||
o have this duration in days before saving profile
|
||||
[number] [default: 7]
|
||||
--proxyServer if set, will use specified proxy server. Takes prece
|
||||
dence over any env var proxy settings [string]
|
||||
--sshProxyPrivateKeyFile path to SSH private key for SOCKS5 over SSH proxy co
|
||||
nnection [string]
|
||||
--sshProxyKnownHostsFile path to SSH known hosts file for SOCKS5 over SSH pro
|
||||
xy connection [string]
|
||||
```
|
||||
|
|
86
docs/docs/user-guide/proxies.md
Normal file
86
docs/docs/user-guide/proxies.md
Normal file
|
@ -0,0 +1,86 @@
|
|||
# Crawling with Proxies
|
||||
Browser Crawler supports crawling through HTTP and SOCKS5 proxies, including through a SOCKS5 proxy over an SSH tunnel.
|
||||
|
||||
To specify a proxy, the `PROXY_SERVER` environment variable or `--proxyServer` CLI flag can be passed in.
|
||||
If both are provided, the `--proxyServer` CLI flag will take precedence.
|
||||
|
||||
The proxy server can be specified as a `http://`, `socks5://`, or `ssh://` URL.
|
||||
|
||||
### HTTP Proxies
|
||||
|
||||
To crawl through an HTTP proxy running at `http://path-to-proxy-host.example.com:9000`, run the crawler with:
|
||||
|
||||
```sh
|
||||
docker run -v $PWD/crawls/:/crawls/ -e PROXY_SERVER=http://path-to-proxy-host.example.com:9000 webrecorder/browsertrix-crawler crawl --url https://example.com/
|
||||
```
|
||||
|
||||
or
|
||||
|
||||
```sh
|
||||
docker run -v $PWD/crawls/:/crawls/ webrecorder/browsertrix-crawler crawl --url https://example.com/ --proxyServer http://path-to-proxy-host.example.com:9000
|
||||
```
|
||||
|
||||
The crawler *does not* support authentication for HTTP proxies, as that is not supported by the browser.
|
||||
|
||||
(For backwards compatibility with crawler 0.x, `PROXY_HOST` and `PROXY_PORT` environment variables can be used to specify an HTTP proxy instead of `PROXY_SERVER`
|
||||
which takes precedence if provided).
|
||||
|
||||
|
||||
### SOCKS5 Proxies
|
||||
|
||||
To use a SOCKS5 proxy running at `path-to-proxy-host.example.com:9001`, run the crawler with:
|
||||
|
||||
```sh
|
||||
docker run -v $PWD/crawls/:/crawls/ -e PROXY_SERVER=socks5://path-to-proxy-host.example.com:9001 webrecorder/browsertrix-crawler crawl --url https://example.com/
|
||||
```
|
||||
|
||||
The crawler *does* support password authentication for SOCKS5 proxies, which can be provided as `user:password` in the proxy URL:
|
||||
|
||||
```sh
|
||||
docker run-v $PWD/crawls/:/crawls/ -e PROXY_SERVER=socks5://user:password@path-to-proxy-host.example.com:9001 webrecorder/browsertrix-crawler crawl --url https://example.com/
|
||||
```
|
||||
|
||||
### SSH Proxies
|
||||
|
||||
Starting with 1.3.0, the crawler also supports crawling through an SOCKS5 that is established over an SSH tunnel, via `ssh -D`.
|
||||
With this option, the crawler can SSH into a remote machine that has SSH and port forwarding enabled and crawl through that machine's network.
|
||||
|
||||
To use this proxy, the private SSH key file must be provided via `--sshProxyPrivateKeyFile` CLI flag.
|
||||
|
||||
The private key and public host key should be mounted as volumes into a path in the container, as shown below.
|
||||
|
||||
For example, to connect via SSH to host `path-to-ssh-host.example.com` as user `user` with private key stored in `./my-proxy-private-key`, run:
|
||||
|
||||
```sh
|
||||
docker run -v $PWD/crawls/:/crawls/ -v $PWD/my-proxy-private-key:/tmp/private-key webrecorder/browsertrix-crawler crawl --url https://httpbin.org/ip --proxyServer ssh://user@path-to-ssh-host.example.com --sshProxyPrivateKeyFile /tmp/private-key
|
||||
```
|
||||
|
||||
To also provide the host public key (eg. `./known_hosts` file) for additional verification, run:
|
||||
|
||||
```sh
|
||||
docker run -v $PWD/crawls/:/crawls/ -v $PWD/my-proxy-private-key:/tmp/private-key -v $PWD/known_hosts:/tmp/known_hosts webrecorder/browsertrix-crawler crawl --url https://httpbin.org/ip --proxyServer ssh://user@path-to-ssh-host.example.com --sshProxyPrivateKeyFile /tmp/private-key --sshProxyKnownHostsFile /tmp/known_hosts
|
||||
```
|
||||
|
||||
The host key will only be checked if provided in a file via: `--sshProxyKnownHostsFile`.
|
||||
|
||||
A custom SSH port can be provided with `--proxyServer ssh://user@path-to-ssh-host.example.com:2222`, otherwise the
|
||||
connection will be attempted via the default SSH port (port 22).
|
||||
|
||||
The SSH connection establishes a tunnel on a local port in the container (9722) which will forward inbound/outbound traffic through the remote proxy.
|
||||
The `autossh` utility is used to automatically restart the SSH connection, if needed.
|
||||
|
||||
Only key-based authentication is supposed for SSH proxies for now.
|
||||
|
||||
|
||||
## Browser Profiles
|
||||
|
||||
The above proxy settings also apply to [Browser Profile Creation](../browser-profiles), and browser profiles can also be created using proxies, for example:
|
||||
|
||||
```sh
|
||||
docker run -p 6080:6080 -p 9223:9223 -v $PWD/crawls/profiles:/crawls/profiles -v $PWD/my-proxy-private-key:/tmp/private-key -v $PWD/known_hosts:/tmp/known_hosts webrecorder/browsertrix-crawler create-login-profile --url https://example.com/ --proxyServer ssh://user@path-to-ssh-host.example.com --sshProxyPrivateKeyFile /tmp/private-key --sshProxyKnownHostsFile /tmp/known_hosts
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -54,6 +54,7 @@ nav:
|
|||
- user-guide/crawl-scope.md
|
||||
- user-guide/yaml-config.md
|
||||
- user-guide/browser-profiles.md
|
||||
- user-guide/proxies.md
|
||||
- user-guide/behaviors.md
|
||||
- user-guide/qa.md
|
||||
- user-guide/cli-options.md
|
||||
|
|
|
@ -456,8 +456,6 @@ export class Crawler {
|
|||
async bootstrap() {
|
||||
const subprocesses: ChildProcess[] = [];
|
||||
|
||||
this.proxyServer = initProxy(this.params.proxyServer);
|
||||
|
||||
const redisUrl = this.params.redisStoreUrl || "redis://localhost:6379/0";
|
||||
|
||||
if (
|
||||
|
@ -482,6 +480,8 @@ export class Crawler {
|
|||
setWARCInfo(this.infoString, this.params.warcInfo);
|
||||
logger.info(this.infoString);
|
||||
|
||||
this.proxyServer = await initProxy(this.params, RUN_DETACHED);
|
||||
|
||||
logger.info("Seeds", this.seeds);
|
||||
|
||||
if (this.params.behaviorOpts) {
|
||||
|
|
|
@ -16,6 +16,7 @@ import { initStorage } from "./util/storage.js";
|
|||
import { CDPSession, Page, PuppeteerLifeCycleEvent } from "puppeteer-core";
|
||||
import { getInfoString } from "./util/file_reader.js";
|
||||
import { DISPLAY } from "./util/constants.js";
|
||||
import { initProxy } from "./util/proxy.js";
|
||||
|
||||
const profileHTML = fs.readFileSync(
|
||||
new URL("../html/createProfile.html", import.meta.url),
|
||||
|
@ -100,17 +101,28 @@ function cliOpts(): { [key: string]: Options } {
|
|||
default: getDefaultWindowSize(),
|
||||
},
|
||||
|
||||
cookieDays: {
|
||||
type: "number",
|
||||
describe:
|
||||
"If >0, set all cookies, including session cookies, to have this duration in days before saving profile",
|
||||
default: 7,
|
||||
},
|
||||
|
||||
proxyServer: {
|
||||
describe:
|
||||
"if set, will use specified proxy server. Takes precedence over any env var proxy settings",
|
||||
type: "string",
|
||||
},
|
||||
|
||||
cookieDays: {
|
||||
type: "number",
|
||||
sshProxyPrivateKeyFile: {
|
||||
describe: "path to SSH private key for SOCKS5 over SSH proxy connection",
|
||||
type: "string",
|
||||
},
|
||||
|
||||
sshProxyKnownHostsFile: {
|
||||
describe:
|
||||
"If >0, set all cookies, including session cookies, to have this duration in days before saving profile",
|
||||
default: 7,
|
||||
"path to SSH known hosts file for SOCKS5 over SSH proxy connection",
|
||||
type: "string",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
@ -141,6 +153,8 @@ async function main() {
|
|||
|
||||
process.on("SIGTERM", () => handleTerminate("SIGTERM"));
|
||||
|
||||
const proxyServer = await initProxy(params, false);
|
||||
|
||||
if (!params.headless) {
|
||||
logger.debug("Launching XVFB");
|
||||
child_process.spawn("Xvfb", [
|
||||
|
@ -181,7 +195,7 @@ async function main() {
|
|||
headless: params.headless,
|
||||
signals: false,
|
||||
chromeOptions: {
|
||||
proxy: params.proxyServer,
|
||||
proxy: proxyServer,
|
||||
extraArgs: [
|
||||
"--window-position=0,0",
|
||||
`--window-size=${params.windowSize}`,
|
||||
|
|
|
@ -572,6 +572,18 @@ class ArgParser {
|
|||
"if specified, will write crawl.png, replay.png and diff.png for each page where they're different",
|
||||
type: "boolean",
|
||||
},
|
||||
|
||||
sshProxyPrivateKeyFile: {
|
||||
describe:
|
||||
"path to SSH private key for SOCKS5 over SSH proxy connection",
|
||||
type: "string",
|
||||
},
|
||||
|
||||
sshProxyKnownHostsFile: {
|
||||
describe:
|
||||
"path to SSH known hosts file for SOCKS5 over SSH proxy connection",
|
||||
type: "string",
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ export const LOG_CONTEXT_TYPES = [
|
|||
"links",
|
||||
"sitemap",
|
||||
"replay",
|
||||
"proxy",
|
||||
] as const;
|
||||
|
||||
export type LogContext = (typeof LOG_CONTEXT_TYPES)[number];
|
||||
|
|
|
@ -1,8 +1,17 @@
|
|||
import net from "net";
|
||||
import { Dispatcher, ProxyAgent, setGlobalDispatcher } from "undici";
|
||||
|
||||
import child_process from "child_process";
|
||||
|
||||
import { logger } from "./logger.js";
|
||||
|
||||
import { socksDispatcher } from "fetch-socks";
|
||||
import type { SocksProxyType } from "socks/typings/common/constants.js";
|
||||
|
||||
const SSH_PROXY_LOCAL_PORT = 9722;
|
||||
|
||||
const SSH_WAIT_TIMEOUT = 30000;
|
||||
|
||||
export function getEnvProxyUrl() {
|
||||
if (process.env.PROXY_SERVER) {
|
||||
return process.env.PROXY_SERVER;
|
||||
|
@ -16,10 +25,19 @@ export function getEnvProxyUrl() {
|
|||
return "";
|
||||
}
|
||||
|
||||
export function initProxy(proxy?: string): string {
|
||||
export async function initProxy(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
params: Record<string, any>,
|
||||
detached: boolean,
|
||||
) {
|
||||
let proxy = params.proxyServer;
|
||||
|
||||
if (!proxy) {
|
||||
proxy = getEnvProxyUrl();
|
||||
}
|
||||
if (proxy && proxy.startsWith("ssh://")) {
|
||||
proxy = await runSSHD(params, detached);
|
||||
}
|
||||
if (proxy) {
|
||||
const dispatcher = createDispatcher(proxy);
|
||||
if (dispatcher) {
|
||||
|
@ -43,7 +61,7 @@ export function createDispatcher(proxyUrl: string): Dispatcher | undefined {
|
|||
proxyUrl.startsWith("socks5://") ||
|
||||
proxyUrl.startsWith("socks4://")
|
||||
) {
|
||||
// support auth as SOCKS5 auth *is* supported in Brave (though not in Chromium)
|
||||
// SOCKS5 auth *is* supported in Brave (though not in Chromium)
|
||||
const url = new URL(proxyUrl);
|
||||
const type: SocksProxyType = url.protocol === "socks4:" ? 4 : 5;
|
||||
const params = {
|
||||
|
@ -58,3 +76,137 @@ export function createDispatcher(proxyUrl: string): Dispatcher | undefined {
|
|||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
export async function runSSHD(params: Record<string, any>, detached: boolean) {
|
||||
const { proxyServer } = params;
|
||||
if (!proxyServer || !proxyServer.startsWith("ssh://")) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const proxyServerUrl = new URL(proxyServer);
|
||||
|
||||
// unwrap ipv6 addresses which must be wrapped in []
|
||||
const host = proxyServerUrl.hostname.replace("[", "").replace("]", "");
|
||||
const port = proxyServerUrl.port || 22;
|
||||
const user = proxyServerUrl.username || "root";
|
||||
const localPort = params.sshProxyLocalPort || SSH_PROXY_LOCAL_PORT;
|
||||
const proxyString = `socks5://localhost:${localPort}`;
|
||||
|
||||
const args: string[] = [
|
||||
user + "@" + host,
|
||||
"-p",
|
||||
port,
|
||||
"-D",
|
||||
localPort,
|
||||
"-i",
|
||||
params.sshProxyPrivateKeyFile,
|
||||
"-o",
|
||||
"IdentitiesOnly=yes",
|
||||
"-o",
|
||||
"ServerAliveInterval=10", // keep ssh connection open if it becomes inactive
|
||||
"-o",
|
||||
"ExitOnForwardFailure=yes", // exit ssh when it's unable to open a socks proxy port
|
||||
"-o",
|
||||
];
|
||||
|
||||
if (params.sshProxyKnownHostsFile) {
|
||||
args.push(`UserKnownHostsFile=${params.sshProxyKnownHostsFile}`);
|
||||
} else {
|
||||
args.push("StrictHostKeyChecking=no");
|
||||
}
|
||||
|
||||
args.push("-M", "0", "-N", "-T");
|
||||
|
||||
logger.info("Checking SSH connection for proxy...", {}, "proxy");
|
||||
logger.debug("SSH Command: autossh " + args.join(" "), {}, "proxy");
|
||||
|
||||
const proc = child_process.spawn("autossh", args, { detached });
|
||||
|
||||
let procStdout = "";
|
||||
let procStderr = "";
|
||||
proc.stdout.on("data", (data) => {
|
||||
procStdout += data.toString();
|
||||
logger.debug("Proxy Stdout: " + data.toString(), {}, "proxy");
|
||||
});
|
||||
proc.stderr.on("data", (data) => {
|
||||
procStderr += data.toString();
|
||||
logger.debug("Proxy Stderr: " + data.toString(), {}, "proxy");
|
||||
});
|
||||
|
||||
const timeout = SSH_WAIT_TIMEOUT;
|
||||
const waitForSocksPort = new Promise((resolve, reject) => {
|
||||
const startTime = Date.now();
|
||||
function rejectOrRetry() {
|
||||
if (Date.now() - startTime >= timeout) {
|
||||
reject("Timeout reached");
|
||||
} else {
|
||||
logger.debug("Retrying connection to SSH proxy port", {}, "proxy");
|
||||
setTimeout(testPort, 500);
|
||||
}
|
||||
}
|
||||
function testPort() {
|
||||
if (proc.exitCode) {
|
||||
reject("Process failed");
|
||||
}
|
||||
const conn = net
|
||||
.connect(localPort, "localhost")
|
||||
.on("error", () => {
|
||||
rejectOrRetry();
|
||||
})
|
||||
.on("timeout", () => {
|
||||
conn.end();
|
||||
rejectOrRetry();
|
||||
})
|
||||
.on("connect", () => {
|
||||
conn.end();
|
||||
resolve(true);
|
||||
});
|
||||
const timeRemaining = timeout - (Date.now() - startTime);
|
||||
if (timeRemaining <= 0) {
|
||||
reject("Timeout reached");
|
||||
} else {
|
||||
conn.setTimeout(timeRemaining);
|
||||
}
|
||||
}
|
||||
testPort();
|
||||
});
|
||||
try {
|
||||
await waitForSocksPort;
|
||||
} catch (e) {
|
||||
logger.fatal(
|
||||
"Unable to establish SSH connection for proxy",
|
||||
{
|
||||
error: e,
|
||||
stdout: procStdout,
|
||||
stderr: procStderr,
|
||||
code: proc.exitCode,
|
||||
},
|
||||
"proxy",
|
||||
21,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`Established SSH tunnel for proxy ${proxyString} -> ${proxyServer}`,
|
||||
{},
|
||||
"proxy",
|
||||
);
|
||||
|
||||
proc.on("exit", (code, signal) => {
|
||||
logger.warn(
|
||||
`SSH crashed, restarting`,
|
||||
{
|
||||
code,
|
||||
signal,
|
||||
stdout: procStdout,
|
||||
stderr: procStderr,
|
||||
},
|
||||
"proxy",
|
||||
);
|
||||
runSSHD(params, detached);
|
||||
});
|
||||
|
||||
return proxyString;
|
||||
}
|
||||
|
|
7
tests/fixtures/proxy-key
vendored
Normal file
7
tests/fixtures/proxy-key
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
-----BEGIN OPENSSH PRIVATE KEY-----
|
||||
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAAAMwAAAAtzc2gtZW
|
||||
QyNTUxOQAAACBlI7zERGb3+ugvSkqMQytJp/XEQhsAw5c2We9HccnV0gAAAJi1AenmtQHp
|
||||
5gAAAAtzc2gtZWQyNTUxOQAAACBlI7zERGb3+ugvSkqMQytJp/XEQhsAw5c2We9HccnV0g
|
||||
AAAEB76AYPsL0SvcLL7AUKUwF9jY077ylBHaIea3sWs3b9s2UjvMREZvf66C9KSoxDK0mn
|
||||
9cRCGwDDlzZZ70dxydXSAAAADnRlc3RAbG9jYWxob3N0AQIDBAUGBw==
|
||||
-----END OPENSSH PRIVATE KEY-----
|
1
tests/fixtures/proxy-key.pub
vendored
Normal file
1
tests/fixtures/proxy-key.pub
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGUjvMREZvf66C9KSoxDK0mn9cRCGwDDlzZZ70dxydXS test@localhost
|
|
@ -7,6 +7,8 @@ const SOCKS_PORT = "1080";
|
|||
const HTTP_PORT = "3128";
|
||||
const WRONG_PORT = "33130";
|
||||
|
||||
const SSH_PROXY_IMAGE = "linuxserver/openssh-server"
|
||||
|
||||
const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf";
|
||||
const HTML = "https://webrecorder.net/";
|
||||
|
||||
|
@ -14,6 +16,7 @@ const extraArgs = "--limit 1 --failOnFailedSeed --timeout 10 --logging debug";
|
|||
|
||||
let proxyAuthId;
|
||||
let proxyNoAuthId;
|
||||
let proxySSHId;
|
||||
|
||||
beforeAll(() => {
|
||||
execSync("docker network create proxy-test-net");
|
||||
|
@ -21,12 +24,15 @@ beforeAll(() => {
|
|||
proxyAuthId = execSync(`docker run -e PROXY_LOGIN=user -e PROXY_PASSWORD=passw0rd -d --rm --network=proxy-test-net --name proxy-with-auth ${PROXY_IMAGE}`, {encoding: "utf-8"});
|
||||
|
||||
proxyNoAuthId = execSync(`docker run -d --rm --network=proxy-test-net --name proxy-no-auth ${PROXY_IMAGE}`, {encoding: "utf-8"});
|
||||
|
||||
proxySSHId = execSync(`docker run -d --rm -e DOCKER_MODS=linuxserver/mods:openssh-server-ssh-tunnel -e USER_NAME=user -e PUBLIC_KEY_FILE=/keys/proxy-key.pub -v $PWD/tests/fixtures/proxy-key.pub:/keys/proxy-key.pub --network=proxy-test-net --name ssh-proxy ${SSH_PROXY_IMAGE}`);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
execSync(`docker kill -s SIGINT ${proxyAuthId}`);
|
||||
execSync(`docker kill -s SIGINT ${proxyNoAuthId}`);
|
||||
await sleep(3000);
|
||||
execSync(`docker kill -s SIGINT ${proxySSHId}`);
|
||||
await sleep(5000);
|
||||
execSync("docker network rm proxy-test-net");
|
||||
});
|
||||
|
||||
|
@ -125,3 +131,22 @@ test("http proxy set, but not running, cli arg", () => {
|
|||
});
|
||||
|
||||
|
||||
test("ssh socks proxy with custom user", () => {
|
||||
execSync(`docker run --rm --network=proxy-test-net -v $PWD/tests/fixtures/proxy-key:/keys/proxy-key webrecorder/browsertrix-crawler crawl --proxyServer ssh://user@ssh-proxy:2222 --sshProxyPrivateKeyFile /keys/proxy-key --url ${HTML} ${extraArgs}`, {encoding: "utf-8"});
|
||||
});
|
||||
|
||||
|
||||
test("ssh socks proxy, wrong user", () => {
|
||||
let status = 0;
|
||||
|
||||
try {
|
||||
execSync(`docker run --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --proxyServer ssh://ssh-proxy:2222 --url ${HTML} ${extraArgs}`, {encoding: "utf-8"});
|
||||
} catch (e) {
|
||||
status = e.status;
|
||||
}
|
||||
expect(status).toBe(21);
|
||||
});
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue