This commit is contained in:
Vinzenz Sinapius 2024-05-31 09:31:41 +02:00
parent eb47cb7a50
commit ec07cc99f1
5 changed files with 117 additions and 4 deletions

View file

@ -17,6 +17,7 @@
},
"dependencies": {
"@novnc/novnc": "^1.4.0",
"@types/node-fetch": "^2.6.11",
"@types/sax": "^1.2.7",
"@webrecorder/wabac": "^2.16.12",
"browsertrix-behaviors": "^0.6.0",
@ -27,12 +28,14 @@
"js-levenshtein": "^1.1.6",
"js-yaml": "^4.1.0",
"minio": "^7.1.3",
"node-fetch": "^3.3.2",
"p-queue": "^7.3.4",
"pixelmatch": "^5.3.0",
"pngjs": "^7.0.0",
"puppeteer-core": "^22.6.1",
"sax": "^1.3.0",
"sharp": "^0.32.6",
"socks-proxy-agent": "^8.0.3",
"tsc": "^2.0.4",
"uuid": "8.3.2",
"warcio": "^2.2.1",

View file

@ -115,7 +115,7 @@ export class Browser {
? undefined
: (target) => this.targetFilter(target),
};
logger.info("Launching browser", launchOpts);
await this._init(launchOpts, ondisconnect, recording);
}
@ -237,6 +237,7 @@ export class Browser {
];
proxy = proxy || this.getProxy();
logger.info(`Proxy settings: ${proxy}`);
if (proxy) {
args.push("--ignore-certificate-errors");

View file

@ -8,6 +8,13 @@ import { logger, formatErr } from "./logger.js";
import { sleep, timedRun, timestampNow } from "./timing.js";
import { RequestResponseInfo, isHTMLContentType } from "./reqresp.js";
import { SocksProxyAgent } from "socks-proxy-agent";
import fetch, { Response } from "node-fetch";
import { default as stream } from "node:stream";
import type { ReadableStream } from "node:stream/web";
// @ts-expect-error TODO fill in why error is expected
import { baseRules as baseDSRules } from "@webrecorder/wabac/src/rewrite/index.js";
import {
@ -1165,6 +1172,8 @@ class AsyncFetcher {
manualRedirect = false;
socksAgent: SocksProxyAgent | null = null;
constructor({
tempdir,
reqresp,
@ -1195,6 +1204,10 @@ class AsyncFetcher {
this.maxFetchSize = maxFetchSize;
this.manualRedirect = manualRedirect;
if (process.env.PROXY_SERVER) {
this.socksAgent = new SocksProxyAgent(process.env.PROXY_SERVER);
}
}
async load() {
@ -1361,6 +1374,7 @@ class AsyncFetcher {
body: reqresp.postData || undefined,
signal,
redirect: this.manualRedirect ? "manual" : "follow",
agent: this.socksAgent || undefined,
});
if (this.filter && !this.filter(resp) && abort) {
@ -1386,10 +1400,14 @@ class AsyncFetcher {
reqresp.fillFetchResponse(resp);
return this.takeReader(resp.body.getReader());
const reader = stream.Readable.fromWeb(
resp.body as unknown as ReadableStream<Uint8Array>,
);
return this.takeReader(reader);
}
async *takeReader(reader: ReadableStreamDefaultReader<Uint8Array>) {
async *takeReader(reader: stream.Readable) {
let size = 0;
try {
while (true) {

View file

@ -5,6 +5,8 @@ import { Protocol } from "puppeteer-core";
import { postToGetUrl } from "warcio";
import { HTML_TYPES } from "./constants.js";
import { Response } from "node-fetch";
const CONTENT_LENGTH = "content-length";
const CONTENT_TYPE = "content-type";
const EXCLUDE_HEADERS = ["content-encoding", "transfer-encoding"];

View file

@ -1136,6 +1136,14 @@
resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841"
integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==
"@types/node-fetch@^2.6.11":
version "2.6.11"
resolved "https://registry.yarnpkg.com/@types/node-fetch/-/node-fetch-2.6.11.tgz#9b39b78665dae0e82a08f02f4967d62c66f95d24"
integrity sha512-24xFj9R5+rfQJLRyM56qh+wnVSYhyXC2tkoBndtY0U+vubqNsYXGjufB2nn8Q6gt0LrARwL6UBtMCSVCwl4B1g==
dependencies:
"@types/node" "*"
form-data "^4.0.0"
"@types/node@*":
version "15.3.0"
resolved "https://registry.yarnpkg.com/@types/node/-/node-15.3.0.tgz#d6fed7d6bc6854306da3dea1af9f874b00783e26"
@ -1363,6 +1371,13 @@ agent-base@^7.0.2, agent-base@^7.1.0:
dependencies:
debug "^4.3.4"
agent-base@^7.1.1:
version "7.1.1"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.1.tgz#bdbded7dfb096b751a2a087eeeb9664725b2e317"
integrity sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==
dependencies:
debug "^4.3.4"
ajv@^6.12.4:
version "6.12.6"
resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4"
@ -1476,6 +1491,11 @@ async@^3.2.4:
resolved "https://registry.yarnpkg.com/async/-/async-3.2.4.tgz#2d22e00f8cddeb5fde5dd33522b56d1cf569a81c"
integrity sha512-iAB+JbDEGXhyIUavoDl9WP/Jj106Kz9DEn1DPgYw5ruDn0e3Wgi3sKFm55sASdGBNOQB8F59d9qQ7deqrHA8wQ==
asynckit@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
auto-js-ipfs@^2.1.1:
version "2.3.0"
resolved "https://registry.yarnpkg.com/auto-js-ipfs/-/auto-js-ipfs-2.3.0.tgz#2c2684074cdaa2eb579345c4f86420d7635956c8"
@ -1849,6 +1869,13 @@ color@^4.2.3:
color-convert "^2.0.1"
color-string "^1.9.0"
combined-stream@^1.0.8:
version "1.0.8"
resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
integrity sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==
dependencies:
delayed-stream "~1.0.0"
concat-map@0.0.1:
version "0.0.1"
resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b"
@ -1905,6 +1932,11 @@ crypto-random-string@^4.0.0:
dependencies:
type-fest "^1.0.1"
data-uri-to-buffer@^4.0.0:
version "4.0.1"
resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz#d8feb2b2881e6a4f58c2e08acfd0e2834e26222e"
integrity sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==
data-uri-to-buffer@^5.0.1:
version "5.0.1"
resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-5.0.1.tgz#db89a9e279c2ffe74f50637a59a32fb23b3e4d7c"
@ -1972,6 +2004,11 @@ degenerator@^5.0.0:
escodegen "^2.1.0"
esprima "^4.0.1"
delayed-stream@~1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
denque@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/denque/-/denque-2.1.0.tgz#e93e1a6569fb5e66f16a3c2a2964617d349d6ab1"
@ -2386,6 +2423,14 @@ fd-slicer@~1.1.0:
dependencies:
pend "~1.2.0"
fetch-blob@^3.1.2, fetch-blob@^3.1.4:
version "3.2.0"
resolved "https://registry.yarnpkg.com/fetch-blob/-/fetch-blob-3.2.0.tgz#f09b8d4bbd45adc6f0c20b7e787e793e309dcce9"
integrity sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==
dependencies:
node-domexception "^1.0.0"
web-streams-polyfill "^3.0.3"
file-entry-cache@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"
@ -2439,6 +2484,22 @@ foreach@^2.0.5:
resolved "https://registry.yarnpkg.com/foreach/-/foreach-2.0.5.tgz#0bee005018aeb260d0a3af3ae658dd0136ec1b99"
integrity sha1-C+4AUBiusmDQo6865ljdATbsG5k=
form-data@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/form-data/-/form-data-4.0.0.tgz#93919daeaf361ee529584b9b31664dc12c9fa452"
integrity sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==
dependencies:
asynckit "^0.4.0"
combined-stream "^1.0.8"
mime-types "^2.1.12"
formdata-polyfill@^4.0.10:
version "4.0.10"
resolved "https://registry.yarnpkg.com/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz#24807c31c9d402e002ab3d8c720144ceb8848423"
integrity sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==
dependencies:
fetch-blob "^3.1.2"
fs-constants@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/fs-constants/-/fs-constants-1.0.0.tgz#6be0de9be998ce16af8afc24497b9ee9b7ccd9ad"
@ -3606,7 +3667,7 @@ mime-db@1.52.0:
resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.52.0.tgz#bbabcdc02859f4987301c856e3387ce5ec43bf70"
integrity sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==
mime-types@^2.1.35:
mime-types@^2.1.12, mime-types@^2.1.35:
version "2.1.35"
resolved "https://registry.yarnpkg.com/mime-types/-/mime-types-2.1.35.tgz#381a871b62a734450660ae3deee44813f70d959a"
integrity sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==
@ -3704,6 +3765,20 @@ node-addon-api@^6.1.0:
resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-6.1.0.tgz#ac8470034e58e67d0c6f1204a18ae6995d9c0d76"
integrity sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==
node-domexception@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/node-domexception/-/node-domexception-1.0.0.tgz#6888db46a1f71c0b76b3f7555016b63fe64766e5"
integrity sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==
node-fetch@^3.3.2:
version "3.3.2"
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-3.3.2.tgz#d1e889bacdf733b4ff3b2b243eb7a12866a0b78b"
integrity sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==
dependencies:
data-uri-to-buffer "^4.0.0"
fetch-blob "^3.1.4"
formdata-polyfill "^4.0.10"
node-int64@^0.4.0:
version "0.4.0"
resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b"
@ -4429,6 +4504,15 @@ socks-proxy-agent@^8.0.2:
debug "^4.3.4"
socks "^2.7.1"
socks-proxy-agent@^8.0.3:
version "8.0.3"
resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz#6b2da3d77364fde6292e810b496cb70440b9b89d"
integrity sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==
dependencies:
agent-base "^7.1.1"
debug "^4.3.4"
socks "^2.7.1"
socks@^2.7.1:
version "2.7.1"
resolved "https://registry.yarnpkg.com/socks/-/socks-2.7.1.tgz#d8e651247178fde79c0663043e07240196857d55"
@ -4958,6 +5042,11 @@ web-encoding@^1.1.5:
optionalDependencies:
"@zxing/text-encoding" "0.9.0"
web-streams-polyfill@^3.0.3:
version "3.3.3"
resolved "https://registry.yarnpkg.com/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz#2073b91a2fdb1fbfbd401e7de0ac9f8214cecb4b"
integrity sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==
which-boxed-primitive@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6"