switch to undici proxy

This commit is contained in:
Ilya Kreymer 2024-06-05 14:52:09 -07:00
parent ec07cc99f1
commit ae146b182e
5 changed files with 90 additions and 123 deletions

View file

@ -17,26 +17,25 @@
},
"dependencies": {
"@novnc/novnc": "^1.4.0",
"@types/node-fetch": "^2.6.11",
"@types/sax": "^1.2.7",
"@webrecorder/wabac": "^2.16.12",
"browsertrix-behaviors": "^0.6.0",
"crc": "^4.3.2",
"fetch-socks": "^1.3.0",
"get-folder-size": "^4.0.0",
"husky": "^8.0.3",
"ioredis": "^5.3.2",
"js-levenshtein": "^1.1.6",
"js-yaml": "^4.1.0",
"minio": "^7.1.3",
"node-fetch": "^3.3.2",
"p-queue": "^7.3.4",
"pixelmatch": "^5.3.0",
"pngjs": "^7.0.0",
"puppeteer-core": "^22.6.1",
"sax": "^1.3.0",
"sharp": "^0.32.6",
"socks-proxy-agent": "^8.0.3",
"tsc": "^2.0.4",
"undici": "^6.18.2",
"uuid": "8.3.2",
"warcio": "^2.2.1",
"ws": "^7.4.4",

View file

@ -236,7 +236,7 @@ export class Browser {
...extraArgs,
];
proxy = proxy || this.getProxy();
proxy = proxy || getProxy();
logger.info(`Proxy settings: ${proxy}`);
if (proxy) {
@ -268,19 +268,6 @@ export class Browser {
return `Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${version} Safari/537.36`;
}
getProxy() {
if (process.env.PROXY_SERVER) {
return process.env.PROXY_SERVER;
}
// for backwards compatibility with 0.x proxy settings
if (process.env.PROXY_HOST && process.env.PROXY_PORT) {
return `http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`;
}
return "";
}
getBrowserExe() {
const files = [
process.env.BROWSER_BIN,
@ -614,3 +601,16 @@ export const defaultArgs = [
"--export-tagged-pdf",
"--apps-keep-chrome-alive-in-tests",
];
export function getProxy() {
if (process.env.PROXY_SERVER) {
return process.env.PROXY_SERVER;
}
// for backwards compatibility with 0.x proxy settings
if (process.env.PROXY_HOST && process.env.PROXY_PORT) {
return `http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`;
}
return "";
}

View file

@ -8,12 +8,12 @@ import { logger, formatErr } from "./logger.js";
import { sleep, timedRun, timestampNow } from "./timing.js";
import { RequestResponseInfo, isHTMLContentType } from "./reqresp.js";
import { SocksProxyAgent } from "socks-proxy-agent";
import { fetch, Response, Dispatcher, ProxyAgent } from "undici";
import fetch, { Response } from "node-fetch";
import { socksDispatcher } from "fetch-socks";
import type { SocksProxyType } from "socks/typings/common/constants.js";
import { default as stream } from "node:stream";
import type { ReadableStream } from "node:stream/web";
import { getProxy } from "./browser.js";
// @ts-expect-error TODO fill in why error is expected
import { baseRules as baseDSRules } from "@webrecorder/wabac/src/rewrite/index.js";
@ -138,6 +138,8 @@ export class Recorder {
frameIdToExecId: Map<string, number> | null;
dispatcher?: Dispatcher;
constructor({
workerid,
writer,
@ -160,6 +162,8 @@ export class Recorder {
this.fetcherQ = new PQueue({ concurrency: 1 });
this.frameIdToExecId = null;
this.dispatcher = createDispatcher();
}
async onCreatePage({
@ -1172,8 +1176,6 @@ class AsyncFetcher {
manualRedirect = false;
socksAgent: SocksProxyAgent | null = null;
constructor({
tempdir,
reqresp,
@ -1204,10 +1206,6 @@ class AsyncFetcher {
this.maxFetchSize = maxFetchSize;
this.manualRedirect = manualRedirect;
if (process.env.PROXY_SERVER) {
this.socksAgent = new SocksProxyAgent(process.env.PROXY_SERVER);
}
}
async load() {
@ -1374,7 +1372,7 @@ class AsyncFetcher {
body: reqresp.postData || undefined,
signal,
redirect: this.manualRedirect ? "manual" : "follow",
agent: this.socksAgent || undefined,
dispatcher: this.recorder.dispatcher,
});
if (this.filter && !this.filter(resp) && abort) {
@ -1400,14 +1398,10 @@ class AsyncFetcher {
reqresp.fillFetchResponse(resp);
const reader = stream.Readable.fromWeb(
resp.body as unknown as ReadableStream<Uint8Array>,
);
return this.takeReader(reader);
return this.takeReader(resp.body.getReader());
}
async *takeReader(reader: stream.Readable) {
async *takeReader(reader: ReadableStreamDefaultReader<Uint8Array>) {
let size = 0;
try {
while (true) {
@ -1655,3 +1649,27 @@ function createRequest(
requestBody,
);
}
function createDispatcher(): Dispatcher | undefined {
const proxyUrl = getProxy();
if (proxyUrl.startsWith("http://") || proxyUrl.startsWith("https://")) {
return new ProxyAgent({ uri: proxyUrl });
} else if (
proxyUrl.startsWith("socks://") ||
proxyUrl.startsWith("socks5://") ||
proxyUrl.startsWith("socks4://")
) {
const url = new URL(proxyUrl);
const type: SocksProxyType = url.protocol === "socks4:" ? 4 : 5;
const params = {
type,
host: url.host,
port: parseInt(url.port),
userId: url.username || undefined,
password: url.password || undefined,
};
return socksDispatcher(params);
} else {
return undefined;
}
}

View file

@ -5,7 +5,7 @@ import { Protocol } from "puppeteer-core";
import { postToGetUrl } from "warcio";
import { HTML_TYPES } from "./constants.js";
import { Response } from "node-fetch";
import { Response } from "undici";
const CONTENT_LENGTH = "content-length";
const CONTENT_TYPE = "content-type";

126
yarn.lock
View file

@ -1136,14 +1136,6 @@
resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841"
integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==
"@types/node-fetch@^2.6.11":
version "2.6.11"
resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24"
integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==
dependencies:
"@types/node" "*"
form-data "^4.0.0"
"@types/node@*":
version "15.3.0"
resolved "https://registry.yarnpkg.com/@types/node/-/node-15.3.0.tgz#d6fed7d6bc6854306da3dea1af9f874b00783e26"
@ -1371,13 +1363,6 @@ agent-base@^7.0.2, agent-base@^7.1.0:
dependencies:
debug "^4.3.4"
agent-base@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.1.tgz#bdbded7dfb096b751a2a087eeeb9664725b2e317"
integrity sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==
dependencies:
debug "^4.3.4"
ajv@^6.12.4:
version "6.12.6"
resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
@ -1491,11 +1476,6 @@ async@^3.2.4:
resolved "https://registry.yarnpkg.com/async/-/async-3.2.4.tgz#2d22e00f8cddeb5fde5dd33522b56d1cf569a81c"
integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==
asynckit@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
auto-js-ipfs@^2.1.1:
version "2.3.0"
resolved "https://registry.yarnpkg.com/auto-js-ipfs/-/auto-js-ipfs-2.3.0.tgz#2c2684074cdaa2eb579345c4f86420d7635956c8"
@ -1869,13 +1849,6 @@ color@^4.2.3:
color-convert "^2.0.1"
color-string "^1.9.0"
combined-stream@^1.0.8:
version "1.0.8"
resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
dependencies:
delayed-stream "~1.0.0"
concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
@ -1932,11 +1905,6 @@ crypto-random-string@^4.0.0:
dependencies:
type-fest "^1.0.1"
data-uri-to-buffer@^4.0.0:
version "4.0.1"
resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz#d8feb2b2881e6a4f58c2e08acfd0e2834e26222e"
integrity sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==
data-uri-to-buffer@^5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz#db89a9e279c2ffe74f50637a59a32fb23b3e4d7c"
@ -2004,11 +1972,6 @@ degenerator@^5.0.0:
escodegen "^2.1.0"
esprima "^4.0.1"
delayed-stream@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
denque@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/denque/-/denque-2.1.0.tgz#e93e1a6569fb5e66f16a3c2a2964617d349d6ab1"
@ -2423,13 +2386,13 @@ fd-slicer@~1.1.0:
dependencies:
pend "~1.2.0"
fetch-blob@^3.1.2, fetch-blob@^3.1.4:
version "3.2.0"
resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9"
integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==
fetch-socks@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/fetch-socks/-/fetch-socks-1.3.0.tgz#1f07b26924b5e7370aa23fd6e9332a5863736d1b"
integrity sha512-Cq7O53hoNiVeOs6u54f8M/H/w2yzhmnTQ3tcAJj9FNKYOeNGmt8qNU1zpWOzJD09f0uqfmBXxLbzWPsnT6GcRw==
dependencies:
node-domexception "^1.0.0"
web-streams-polyfill "^3.0.3"
socks "^2.8.1"
undici "^6.10.1"
file-entry-cache@^6.0.1:
version "6.0.1"
@ -2484,22 +2447,6 @@ foreach@^2.0.5:
resolved "https://registry.yarnpkg.com/foreach/-/foreach-2.0.5.tgz#0bee005018aeb260d0a3af3ae658dd0136ec1b99"
integrity sha1-C+4AUBiusmDQo6865ljdATbsG5k=
form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
dependencies:
asynckit "^0.4.0"
combined-stream "^1.0.8"
mime-types "^2.1.12"
formdata-polyfill@^4.0.10:
version "4.0.10"
resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423"
integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==
dependencies:
fetch-blob "^3.1.2"
fs-constants@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad"
@ -2839,6 +2786,14 @@ ioredis@^5.3.2:
redis-parser "^3.0.0"
standard-as-callback "^2.1.0"
ip-address@^9.0.5:
version "9.0.5"
resolved "https://registry.yarnpkg.com/ip-address/-/ip-address-9.0.5.tgz#117a960819b08780c3bd1f14ef3c1cc1d3f3ea5a"
integrity sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==
dependencies:
jsbn "1.1.0"
sprintf-js "^1.1.3"
ip@^1.1.8:
version "1.1.8"
resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.8.tgz#ae05948f6b075435ed3307acce04629da8cdbf48"
@ -3488,6 +3443,11 @@ js-yaml@^4.1.0:
dependencies:
argparse "^2.0.1"
jsbn@1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-1.1.0.tgz#b01307cb29b618a1ed26ec79e911f803c4da0040"
integrity sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==
jsesc@^2.5.1:
version "2.5.2"
resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-2.5.2.tgz#80564d2e483dacf6e8ef209650a67df3f0c283a4"
@ -3667,7 +3627,7 @@ mime-db@1.52.0:
resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70"
integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
mime-types@^2.1.12, mime-types@^2.1.35:
mime-types@^2.1.35:
version "2.1.35"
resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
@ -3765,20 +3725,6 @@ node-addon-api@^6.1.0:
resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-6.1.0.tgz#ac8470034e58e67d0c6f1204a18ae6995d9c0d76"
integrity sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==
node-domexception@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==
node-fetch@^3.3.2:
version "3.3.2"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.3.2.tgz#d1e889bacdf733b4ff3b2b243eb7a12866a0b78b"
integrity sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==
dependencies:
data-uri-to-buffer "^4.0.0"
fetch-blob "^3.1.4"
formdata-polyfill "^4.0.10"
node-int64@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
@ -4504,15 +4450,6 @@ socks-proxy-agent@^8.0.2:
debug "^4.3.4"
socks "^2.7.1"
socks-proxy-agent@^8.0.3:
version "8.0.3"
resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz#6b2da3d77364fde6292e810b496cb70440b9b89d"
integrity sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==
dependencies:
agent-base "^7.1.1"
debug "^4.3.4"
socks "^2.7.1"
socks@^2.7.1:
version "2.7.1"
resolved "https://registry.yarnpkg.com/socks/-/socks-2.7.1.tgz#d8e651247178fde79c0663043e07240196857d55"
@ -4521,6 +4458,14 @@ socks@^2.7.1:
ip "^2.0.0"
smart-buffer "^4.2.0"
socks@^2.8.1:
version "2.8.3"
resolved "https://registry.yarnpkg.com/socks/-/socks-2.8.3.tgz#1ebd0f09c52ba95a09750afe3f3f9f724a800cb5"
integrity sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==
dependencies:
ip-address "^9.0.5"
smart-buffer "^4.2.0"
source-map-support@0.5.13:
version "0.5.13"
resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.13.tgz#31b24a9c2e73c2de85066c0feb7d44767ed52932"
@ -4539,6 +4484,11 @@ split-on-first@^1.0.0:
resolved "https://registry.yarnpkg.com/split-on-first/-/split-on-first-1.1.0.tgz#f610afeee3b12bce1d0c30425e76398b78249a5f"
integrity sha512-43ZssAJaMusuKWL8sKUBQXHWOpq8d6CfN/u1p4gUzfJkM05C8rxTmYrkIPTXapZpORA6LkkzcUulJ8FqA7Uudw==
sprintf-js@^1.1.3:
version "1.1.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.1.3.tgz#4914b903a2f8b685d17fdf78a70e917e872e444a"
integrity sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==
sprintf-js@~1.0.2:
version "1.0.3"
resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
@ -4926,6 +4876,11 @@ undici-types@~5.25.1:
resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.25.3.tgz#e044115914c85f0bcbb229f346ab739f064998c3"
integrity sha512-Ga1jfYwRn7+cP9v8auvEXN1rX3sWqlayd4HP7OKk4mZWylEmu3KzXDUGrQUN6Ol7qo1gPvB2e5gX6udnyEPgdA==
undici@^6.10.1, undici@^6.18.2:
version "6.18.2"
resolved "https://registry.yarnpkg.com/undici/-/undici-6.18.2.tgz#f662a5dc33cf654fc412a9912e5a07b138d75c97"
integrity sha512-o/MQLTwRm9IVhOqhZ0NQ9oXax1ygPjw6Vs+Vq/4QRjbOAC3B1GCHy7TYxxbExKlb7bzDRzt9vBWU6BDz0RFfYg==
unique-string@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/unique-string/-/unique-string-3.0.0.tgz#84a1c377aff5fd7a8bc6b55d8244b2bd90d75b9a"
@ -5042,11 +4997,6 @@ web-encoding@^1.1.5:
optionalDependencies:
"@zxing/text-encoding" "0.9.0"
web-streams-polyfill@^3.0.3:
version "3.3.3"
resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz#2073b91a2fdb1fbfbd401e7de0ac9f8214cecb4b"
integrity sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==
which-boxed-primitive@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6"