Merge branch 'main' into release/1.1.4

bump to 1.2.0-beta.1
2025-12-08 06:09:48 +00:00 · 2024-06-13 19:28:25 -07:00 · 2024-06-13 19:28:25 -07:00 · f504effa51
commit f504effa51
parent 9094a8355f e2b4cc1844
25 changed files with 564 additions and 169 deletions
--- a/.github/workflows/make-draft-release.yaml
+++ b/.github/workflows/make-draft-release.yaml
@ -0,0 +1,26 @@
+name: Generate Draft Release
+
+on:
+  push:
+    branches:
+      - main
+      - "*-release"
+
+jobs:
+  package_chart:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out Git repository
+        uses: actions/checkout@v3
+
+      - name: Get Version
+        run: |
+          echo "version=$(jq -r .version package.json)" >> "$GITHUB_ENV"
+
+      - name: Make Draft Release
+        uses: softprops/action-gh-release@v1
+        with:
+          name: "Browsertrix Crawler v${{ env.version }}"
+          tag_name: v${{ env.version }}
+          draft: true
--- a/16
+++ b/16
@ -1,4 +1,4 @@
-ARG BROWSER_VERSION=1.64.109
+ARG BROWSER_VERSION=1.66.115
 ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:brave-${BROWSER_VERSION}

 FROM ${BROWSER_IMAGE_BASE}
@ -6,11 +6,7 @@ FROM ${BROWSER_IMAGE_BASE}
 # needed to add args to main build stage
 ARG BROWSER_VERSION

-ENV PROXY_HOST=localhost \
-    PROXY_PORT=8080 \
-    PROXY_CA_URL=http://wsgiprox/download/pem \
-    PROXY_CA_FILE=/tmp/proxy-ca.pem \
-    DISPLAY=:99 \
+ENV DISPLAY=:99 \
    GEOMETRY=1360x1020x16 \
    BROWSER_VERSION=${BROWSER_VERSION} \
    BROWSER_BIN=google-chrome \
@ -28,9 +24,6 @@ ADD package.json /app/
 # to allow forcing rebuilds from this stage
 ARG REBUILD

-# Prefetch tldextract so pywb is able to boot in environments with limited internet access
-RUN tldextract --update 
-
 # Download and format ad host blocklist as JSON
 RUN mkdir -p /tmp/ads && cd /tmp/ads && \
    curl -vs -o ad-hosts.txt https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \
@ -64,8 +57,11 @@ WORKDIR /crawls
 # enable to test custom behaviors build (from browsertrix-behaviors)
 # COPY behaviors.js /app/node_modules/browsertrix-behaviors/dist/behaviors.js

+# add brave/chromium group policies
+RUN mkdir -p /etc/brave/policies/managed/
+ADD config/policies /etc/brave/policies/managed/
+
 ADD docker-entrypoint.sh /docker-entrypoint.sh
 ENTRYPOINT ["/docker-entrypoint.sh"]

 CMD ["crawl"]
-
--- a/config/policies/brave-default.json
+++ b/config/policies/brave-default.json
@ -0,0 +1,6 @@
+{
+    "BraveRewardsDisabled": true,
+    "BraveWalletDisabled": true,
+    "BraveVPNDisabled": 1,
+    "BraveAIChatEnabled": false
+}
--- a/config/policies/lockdown-profilebrowser.json
+++ b/config/policies/lockdown-profilebrowser.json
@ -0,0 +1,8 @@
+{
+    "IncognitoModeAvailability": 1,
+    "TorDisabled": true,
+    "AllowFileSelectionDialogs": false,
+    "URLBlocklist": [
+        "file://*"
+    ]
+}
--- a/docs/docs/user-guide/cli-options.md
+++ b/docs/docs/user-guide/cli-options.md
@ -144,6 +144,11 @@ Options:
                                            age behavior will run on each page.
                                            If 0, a behavior can run until finis
                                            h.            [number] [default: 90]
+      --postLoadDelay                       If >0, amount of time to sleep (in s
+                                            econds) after page has loaded, befor
+                                            e taking screenshots / getting text
+                                            / running behaviors
+                                                           [number] [default: 0]
      --pageExtraDelay, --delay             If >0, amount of time to sleep (in s
                                            econds) after behaviors before movin
                                            g on to next page
@ -227,16 +232,19 @@ Options:
      --writePagesToRedis                   If set, write page objects to redis
                                                      [boolean] [default: false]
      --failOnFailedSeed                    If set, crawler will fail with exit
-                                            code 1 if any seed fails
-                                                      [boolean] [default: false]
+                                            code 1 if any seed fails. When combi
+                                            ned with --failOnInvalidStatus,will
+                                            result in crawl failing with exit co
+                                            de 1 if any seed has a 4xx/5xx respo
+                                            nse       [boolean] [default: false]
      --failOnFailedLimit                   If set, save state and exit if numbe
                                            r of failed pages exceeds this value
                                                           [number] [default: 0]
-      --failOnInvalidStatus                 If set, will treat pages with non-20
-                                            0 response as failures. When combine
-                                            d with --failOnFailedLimit or --fail
-                                            OnFailedSeedmay result in crawl fail
-                                            ing due to non-200 responses
+      --failOnInvalidStatus                 If set, will treat pages with 4xx or
+                                             5xx response as failures. When comb
+                                            ined with --failOnFailedLimit or --f
+                                            ailOnFailedSeed may result in crawl
+                                            failing due to non-200 responses
                                                      [boolean] [default: false]
      --customBehaviors                     injects a custom behavior file or se
                                            t of behavior files in a directory
@ -250,6 +258,8 @@ Options:
                                            nabled, or disabled with custom prof
                                            ile
   [choices: "disabled", "disabled-if-profile", "enabled"] [default: "disabled"]
+      --dryRun                              If true, no data is written to disk,
+                                             only logs                 [boolean]
      --qaSource                            Required for QA mode. Source (WACZ o
                                            r multi WACZ) for QA        [string]
      --qaDebugImageDiff                    if specified, will write crawl.png,
@ -269,7 +279,8 @@ Options:
                     ted
  --password         The password for the login. If not specified, will be promp
                     ted (recommended)
-  --filename         The filename for the profile tarball
+  --filename         The filename for the profile tarball, stored within /crawls
+                     /profiles if absolute path not provided
                                    [default: "/crawls/profiles/profile.tar.gz"]
  --debugScreenshot  If specified, take a screenshot after login and save as thi
                     s filename
--- a/package.json
+++ b/package.json
@ -1,6 +1,6 @@
 {
  "name": "browsertrix-crawler",
-  "version": "1.1.4",
+  "version": "1.2.0-beta.1",
  "main": "browsertrix-crawler",
  "type": "module",
  "repository": "https://github.com/webrecorder/browsertrix-crawler",
@ -21,6 +21,7 @@
    "@webrecorder/wabac": "^2.16.12",
    "browsertrix-behaviors": "^0.6.0",
    "crc": "^4.3.2",
+    "fetch-socks": "^1.3.0",
    "get-folder-size": "^4.0.0",
    "husky": "^8.0.3",
    "ioredis": "^5.3.2",
--- a/src/crawler.ts
+++ b/src/crawler.ts
@ -16,8 +16,6 @@ import { parseArgs } from "./util/argParser.js";

 import yaml from "js-yaml";

-import * as warcio from "warcio";
-
 import { HealthChecker } from "./util/healthcheck.js";
 import { TextExtractViaSnapshot } from "./util/textextract.js";
 import {
@ -46,27 +44,19 @@ import { Browser } from "./util/browser.js";
 import {
  ADD_LINK_FUNC,
  BEHAVIOR_LOG_FUNC,
-  HTML_TYPES,
  DEFAULT_SELECTORS,
 } from "./util/constants.js";

 import { AdBlockRules, BlockRules } from "./util/blockrules.js";
 import { OriginOverride } from "./util/originoverride.js";

-// to ignore HTTPS error for HEAD check
-import { Agent as HTTPAgent } from "http";
-import { Agent as HTTPSAgent } from "https";
 import { CDPSession, Frame, HTTPRequest, Page, Protocol } from "puppeteer-core";
 import { Recorder } from "./util/recorder.js";
 import { SitemapReader } from "./util/sitemapper.js";
 import { ScopedSeed } from "./util/seeds.js";
-import { WARCWriter } from "./util/warcwriter.js";
-
-const HTTPS_AGENT = new HTTPSAgent({
-  rejectUnauthorized: false,
-});
-
-const HTTP_AGENT = new HTTPAgent();
+import { WARCWriter, createWARCInfo, setWARCInfo } from "./util/warcwriter.js";
+import { isHTMLContentType } from "./util/reqresp.js";
+import { initProxy } from "./util/proxy.js";

 const behaviors = fs.readFileSync(
  new URL(
@ -184,6 +174,8 @@ export class Crawler {
  maxHeapUsed = 0;
  maxHeapTotal = 0;

+  proxyServer?: string;
+
  driver!: (opts: {
    page: Page;
    data: PageState;
@ -191,7 +183,7 @@ export class Crawler {
    crawler: Crawler;
  }) => NonNullable<unknown>;

-  recording = true;
+  recording: boolean;

  constructor() {
    const args = this.parseArgs();
@ -225,6 +217,13 @@ export class Crawler {

    logger.debug("Writing log to: " + this.logFilename, {}, "general");

+    this.recording = !this.params.dryRun;
+    if (this.params.dryRun) {
+      logger.warn(
+        "Dry run mode: no archived data stored, only pages and logging. Storage and archive creation related options will be ignored.",
+      );
+    }
+
    this.headers = {};

    // pages file
@ -449,17 +448,23 @@ export class Crawler {
  async bootstrap() {
    const subprocesses: ChildProcess[] = [];

+    this.proxyServer = initProxy(this.params.proxyServer);
+
    subprocesses.push(this.launchRedis());

    await fsp.mkdir(this.logDir, { recursive: true });
+
+    if (!this.params.dryRun) {
      await fsp.mkdir(this.archivesDir, { recursive: true });
      await fsp.mkdir(this.tempdir, { recursive: true });
      await fsp.mkdir(this.tempCdxDir, { recursive: true });
+    }

    this.logFH = fs.createWriteStream(this.logFilename, { flags: "a" });
    logger.setExternalLogStream(this.logFH);

    this.infoString = await getInfoString();
+    setWARCInfo(this.infoString, this.params.warcInfo);
    logger.info(this.infoString);

    logger.info("Seeds", this.seeds);
@ -515,10 +520,10 @@ export class Crawler {
      );
    }

-    if (this.params.screenshot) {
+    if (this.params.screenshot && !this.params.dryRun) {
      this.screenshotWriter = this.createExtraResourceWarcWriter("screenshots");
    }
-    if (this.params.text) {
+    if (this.params.text && !this.params.dryRun) {
      this.textWriter = this.createExtraResourceWarcWriter("text");
    }
  }
@ -788,7 +793,7 @@ self.__bx_behaviors.selectMainBehavior();
  async crawlPage(opts: WorkerState): Promise<void> {
    await this.writeStats();

-    const { page, data, workerid, callbacks, directFetchCapture } = opts;
+    const { page, cdp, data, workerid, callbacks, directFetchCapture } = opts;
    data.callbacks = callbacks;

    const { url } = data;
@ -797,35 +802,27 @@ self.__bx_behaviors.selectMainBehavior();
    data.logDetails = logDetails;
    data.workerid = workerid;

-    data.isHTMLPage = await timedRun(
-      this.isHTML(url, logDetails),
-      FETCH_TIMEOUT_SECS,
-      "HEAD request to determine if URL is HTML page timed out",
-      logDetails,
-      "fetch",
-      true,
-    );
-
-    if (!data.isHTMLPage && directFetchCapture) {
+    if (directFetchCapture) {
      try {
-        const { fetched, mime } = await timedRun(
-          directFetchCapture(url),
+        const { fetched, mime, ts } = await timedRun(
+          directFetchCapture({ url, headers: this.headers, cdp }),
          FETCH_TIMEOUT_SECS,
          "Direct fetch capture attempt timed out",
          logDetails,
          "fetch",
          true,
        );
-        if (fetched) {
-          data.loadState = LoadState.FULL_PAGE_LOADED;
        if (mime) {
          data.mime = mime;
+          data.isHTMLPage = isHTMLContentType(mime);
        }
+        if (fetched) {
+          data.loadState = LoadState.FULL_PAGE_LOADED;
          data.status = 200;
-          data.ts = new Date();
+          data.ts = ts || new Date();
          logger.info(
            "Direct fetch successful",
-            { url, ...logDetails },
+            { url, mime, ...logDetails },
            "fetch",
          );
          return;
@ -1105,30 +1102,10 @@ self.__bx_behaviors.selectMainBehavior();
    return res ? frame : null;
  }

-  async createWARCInfo(filename: string) {
-    const warcVersion = "WARC/1.1";
-    const type = "warcinfo";
-
-    const info = {
-      software: this.infoString,
-      format: "WARC File Format 1.1",
-    };
-
-    const warcInfo = { ...info, ...this.params.warcInfo };
-    const record = await warcio.WARCRecord.createWARCInfo(
-      { filename, type, warcVersion },
-      warcInfo,
-    );
-    const buffer = await warcio.WARCSerializer.serialize(record, {
-      gzip: true,
-    });
-    return buffer;
-  }
-
  async checkLimits() {
    let interrupt = false;

-    const size = await getDirSize(this.archivesDir);
+    const size = this.params.dryRun ? 0 : await getDirSize(this.archivesDir);

    await this.crawlState.setArchiveSize(size);

@ -1153,7 +1130,11 @@ self.__bx_behaviors.selectMainBehavior();

    if (this.params.diskUtilization) {
      // Check that disk usage isn't already or soon to be above threshold
-      const diskUtil = await checkDiskUtilization(this.params, size);
+      const diskUtil = await checkDiskUtilization(
+        this.collDir,
+        this.params,
+        size,
+      );
      if (diskUtil.stop === true) {
        interrupt = true;
      }
@ -1328,7 +1309,7 @@ self.__bx_behaviors.selectMainBehavior();
      emulateDevice: this.emulateDevice,
      swOpt: this.params.serviceWorker,
      chromeOptions: {
-        proxy: false,
+        proxy: this.proxyServer,
        userAgent: this.emulateDevice.userAgent,
        extraArgs: this.extraChromeArgs(),
      },
@ -1424,11 +1405,11 @@ self.__bx_behaviors.selectMainBehavior();
  }

  async postCrawl() {
-    if (this.params.combineWARC) {
+    if (this.params.combineWARC && !this.params.dryRun) {
      await this.combineWARC();
    }

-    if (this.params.generateCDX) {
+    if (this.params.generateCDX && !this.params.dryRun) {
      logger.info("Generating CDX");
      await fsp.mkdir(path.join(this.collDir, "indexes"), { recursive: true });
      await this.crawlState.setStatus("generate-cdx");
@ -1460,6 +1441,7 @@ self.__bx_behaviors.selectMainBehavior();

    if (
      this.params.generateWACZ &&
+      !this.params.dryRun &&
      (!this.interrupted || this.finalExit || this.uploadAndDeleteLocal)
    ) {
      const uploaded = await this.generateWACZ();
@ -1775,7 +1757,7 @@ self.__bx_behaviors.selectMainBehavior();

      const contentType = resp.headers()["content-type"];

-      isHTMLPage = this.isHTMLContentType(contentType);
+      isHTMLPage = isHTMLContentType(contentType);

      if (contentType) {
        data.mime = contentType.split(";")[0];
@ -1923,7 +1905,9 @@ self.__bx_behaviors.selectMainBehavior();
      "behavior",
    );
    try {
-      await frame.evaluate("self.__bx_behaviors.awaitPageLoad();");
+      await frame.evaluate(
+        "self.__bx_behaviors && self.__bx_behaviors.awaitPageLoad();",
+      );
    } catch (e) {
      logger.warn("Waiting for custom page load failed", e, "behavior");
    }
@ -2186,12 +2170,14 @@ self.__bx_behaviors.selectMainBehavior();
    let { ts } = state;
    if (!ts) {
      ts = new Date();
+      if (!this.params.dryRun) {
        logger.warn(
          "Page date missing, setting to now",
          { url, ts },
          "pageStatus",
        );
      }
+    }

    row.ts = ts.toISOString();

@ -2241,49 +2227,6 @@ self.__bx_behaviors.selectMainBehavior();
    }
  }

-  resolveAgent(urlParsed: URL) {
-    return urlParsed.protocol === "https:" ? HTTPS_AGENT : HTTP_AGENT;
-  }
-
-  async isHTML(url: string, logDetails: LogDetails) {
-    try {
-      const resp = await fetch(url, {
-        method: "HEAD",
-        headers: this.headers,
-        agent: this.resolveAgent,
-        // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      } as any);
-      if (resp.status !== 200) {
-        logger.debug("HEAD response code != 200, loading in browser", {
-          status: resp.status,
-          ...logDetails,
-        });
-        return true;
-      }
-
-      return this.isHTMLContentType(resp.headers.get("Content-Type"));
-    } catch (e) {
-      // can't confirm not html, so try in browser
-      logger.debug("HEAD request failed", { ...formatErr(e), ...logDetails });
-      return true;
-    }
-  }
-
-  isHTMLContentType(contentType: string | null) {
-    // just load if no content-type
-    if (!contentType) {
-      return true;
-    }
-
-    const mime = contentType.split(";")[0];
-
-    if (HTML_TYPES.includes(mime)) {
-      return true;
-    }
-
-    return false;
-  }
-
  async parseSitemap({ url, sitemap }: ScopedSeed, seedId: number) {
    if (!sitemap) {
      return;
@ -2441,7 +2384,7 @@ self.__bx_behaviors.selectMainBehavior();

        generatedCombinedWarcs.push(combinedWarcName);

-        const warcBuffer = await this.createWARCInfo(combinedWarcName);
+        const warcBuffer = await createWARCInfo(combinedWarcName);
        fh.write(warcBuffer);
      }

--- a/src/create-login-profile.ts
+++ b/src/create-login-profile.ts
@ -99,9 +99,10 @@ function cliOpts(): { [key: string]: Options } {
      default: getDefaultWindowSize(),
    },

-    proxy: {
-      type: "boolean",
-      default: false,
+    proxyServer: {
+      describe:
+        "if set, will use specified proxy server. Takes precedence over any env var proxy settings",
+      type: "string",
    },

    cookieDays: {
@ -179,7 +180,7 @@ async function main() {
    headless: params.headless,
    signals: false,
    chromeOptions: {
-      proxy: false,
+      proxy: params.proxyServer,
      extraArgs: [
        "--window-position=0,0",
        `--window-size=${params.windowSize}`,
--- a/src/util/argParser.ts
+++ b/src/util/argParser.ts
@ -545,6 +545,18 @@ class ArgParser {
        default: "disabled",
      },

+      proxyServer: {
+        describe:
+          "if set, will use specified proxy server. Takes precedence over any env var proxy settings",
+        type: "string",
+      },
+
+      dryRun: {
+        describe:
+          "If true, no archive data is written to disk, only pages and logs (and optionally saved state).",
+        type: "boolean",
+      },
+
      qaSource: {
        describe: "Required for QA mode. Source (WACZ or multi WACZ) for QA",
        type: "string",
--- a/src/util/blockrules.ts
+++ b/src/util/blockrules.ts
@ -4,6 +4,8 @@ import { logger, formatErr } from "./logger.js";
 import { HTTPRequest, Page } from "puppeteer-core";
 import { Browser } from "./browser.js";

+import { fetch } from "undici";
+
 const RULE_TYPES = ["block", "allowOnly"];

 const ALWAYS_ALLOW = ["https://pywb.proxy/", "http://pywb.proxy/"];
--- a/src/util/browser.ts
+++ b/src/util/browser.ts
@ -22,7 +22,7 @@ import { CDPSession, Target, Browser as PptrBrowser } from "puppeteer-core";
 import { Recorder } from "./recorder.js";

 type BtrixChromeOpts = {
-  proxy?: boolean;
+  proxy?: string;
  userAgent?: string | null;
  extraArgs?: string[];
 };
@ -115,7 +115,6 @@ export class Browser {
        ? undefined
        : (target) => this.targetFilter(target),
    };
-
    await this._init(launchOpts, ondisconnect, recording);
  }

@ -217,7 +216,7 @@ export class Browser {
  }

  chromeArgs({
-    proxy = true,
+    proxy = "",
    userAgent = null,
    extraArgs = [],
  }: BtrixChromeOpts) {
@ -236,11 +235,13 @@ export class Browser {
      ...extraArgs,
    ];

+    if (proxy) {
+      logger.info("Using proxy", { proxy }, "browser");
+    }
+
    if (proxy) {
      args.push("--ignore-certificate-errors");
-      args.push(
-        `--proxy-server=http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`,
-      );
+      args.push(`--proxy-server=${proxy}`);
    }

    return args;
--- a/src/util/proxy.ts
+++ b/src/util/proxy.ts
@ -0,0 +1,60 @@
+import { Dispatcher, ProxyAgent, setGlobalDispatcher } from "undici";
+
+import { socksDispatcher } from "fetch-socks";
+import type { SocksProxyType } from "socks/typings/common/constants.js";
+
+export function getEnvProxyUrl() {
+  if (process.env.PROXY_SERVER) {
+    return process.env.PROXY_SERVER;
+  }
+
+  // for backwards compatibility with 0.x proxy settings
+  if (process.env.PROXY_HOST && process.env.PROXY_PORT) {
+    return `http://${process.env.PROXY_HOST}:${process.env.PROXY_PORT}`;
+  }
+
+  return "";
+}
+
+export function initProxy(proxy?: string): string {
+  if (!proxy) {
+    proxy = getEnvProxyUrl();
+  }
+  if (proxy) {
+    const dispatcher = createDispatcher(proxy);
+    if (dispatcher) {
+      setGlobalDispatcher(dispatcher);
+      return proxy;
+    }
+  }
+  return "";
+}
+
+export function createDispatcher(proxyUrl: string): Dispatcher | undefined {
+  if (proxyUrl.startsWith("http://") || proxyUrl.startsWith("https://")) {
+    // HTTP PROXY does not support auth, as it's not supported in the browser
+    // so must drop username/password for consistency
+    const url = new URL(proxyUrl);
+    url.username = "";
+    url.password = "";
+    return new ProxyAgent({ uri: url.href });
+  } else if (
+    proxyUrl.startsWith("socks://") ||
+    proxyUrl.startsWith("socks5://") ||
+    proxyUrl.startsWith("socks4://")
+  ) {
+    // support auth as SOCKS5 auth *is* supported in Brave (though not in Chromium)
+    const url = new URL(proxyUrl);
+    const type: SocksProxyType = url.protocol === "socks4:" ? 4 : 5;
+    const params = {
+      type,
+      host: url.hostname,
+      port: parseInt(url.port),
+      userId: url.username || undefined,
+      password: url.password || undefined,
+    };
+    return socksDispatcher(params);
+  } else {
+    return undefined;
+  }
+}
--- a/src/util/recorder.ts
+++ b/src/util/recorder.ts
@ -6,7 +6,7 @@ import PQueue from "p-queue";

 import { logger, formatErr } from "./logger.js";
 import { sleep, timedRun, timestampNow } from "./timing.js";
-import { RequestResponseInfo } from "./reqresp.js";
+import { RequestResponseInfo, isHTMLContentType } from "./reqresp.js";

 import { fetch, Response } from "undici";

@ -77,11 +77,23 @@ export type AsyncFetchOptions = {
  filter?: (resp: Response) => boolean;
  ignoreDupe?: boolean;
  maxFetchSize?: number;
+  manualRedirect?: boolean;
 };

 // =================================================================
-export type ResponseStreamAsyncFetchOptions = AsyncFetchOptions & {
+export type DirectFetchRequest = {
+  url: string;
+  headers: Record<string, string>;
  cdp: CDPSession;
+};
+
+// =================================================================
+export type NetworkLoadAsyncFetchOptions = AsyncFetchOptions & {
+  cdp: CDPSession;
+};
+
+// =================================================================
+export type ResponseStreamAsyncFetchOptions = NetworkLoadAsyncFetchOptions & {
  requestId: string;
 };

@ -1068,12 +1080,23 @@ export class Recorder {
    this.writer.writeRecordPair(responseRecord, requestRecord);
  }

-  async directFetchCapture(
-    url: string,
-  ): Promise<{ fetched: boolean; mime: string }> {
+  async directFetchCapture({ url, headers, cdp }: DirectFetchRequest): Promise<{
+    fetched: boolean;
+    mime: string;
+    ts: Date;
+  }> {
    const reqresp = new RequestResponseInfo("0");
+    const ts = new Date();
+
+    const cookie = await this.getCookieString(cdp, url);
+    if (cookie) {
+      headers["Cookie"] = cookie;
+    }
+
    reqresp.url = url;
    reqresp.method = "GET";
+    reqresp.requestHeaders = headers;
+    reqresp.ts = ts;

    logger.debug(
      "Directly fetching page URL without browser",
@ -1081,8 +1104,21 @@ export class Recorder {
      "recorder",
    );

-    const filter = (resp: Response) =>
-      resp.status === 200 && !resp.headers.get("set-cookie");
+    let mime: string = "";
+
+    const filter = (resp: Response) => {
+      // only direct load 200 responses
+      if (resp.status !== 200) {
+        return false;
+      }
+
+      const ct = resp.headers.get("content-type");
+      if (ct) {
+        mime = ct.split(";")[0];
+      }
+
+      return !isHTMLContentType(mime);
+    };

    // ignore dupes: if previous URL was not a page, still load as page. if previous was page,
    // should not get here, as dupe pages tracked via seen list
@ -1093,16 +1129,28 @@ export class Recorder {
      networkId: "0",
      filter,
      ignoreDupe: true,
+      manualRedirect: true,
    });
    const res = await fetcher.load();

-    const mime =
-      (reqresp.responseHeaders &&
-        reqresp.responseHeaders["content-type"] &&
-        reqresp.responseHeaders["content-type"].split(";")[0]) ||
-      "";
+    this.addPageRecord(reqresp);

-    return { fetched: res === "fetched", mime };
+    if (url === this.pageUrl && !this.pageInfo.ts) {
+      logger.debug("Setting page timestamp", { ts, url });
+      this.pageInfo.ts = ts;
+    }
+
+    return { fetched: res === "fetched", mime, ts };
+  }
+
+  async getCookieString(cdp: CDPSession, url: string) {
+    const cookieList: string[] = [];
+    const { cookies } = await cdp.send("Network.getCookies", { urls: [url] });
+    for (const { name, value } of cookies) {
+      cookieList.push(`${name}=${value}`);
+    }
+
+    return cookieList.join(";");
  }
 }

@ -1121,6 +1169,8 @@ class AsyncFetcher {
  tempdir: string;
  filename: string;

+  manualRedirect = false;
+
  constructor({
    tempdir,
    reqresp,
@ -1130,6 +1180,7 @@ class AsyncFetcher {
    filter = undefined,
    ignoreDupe = false,
    maxFetchSize = MAX_BROWSER_DEFAULT_FETCH_SIZE,
+    manualRedirect = false,
  }: AsyncFetchOptions) {
    this.reqresp = reqresp;
    this.reqresp.expectedSize = expectedSize;
@ -1148,6 +1199,8 @@ class AsyncFetcher {
    );

    this.maxFetchSize = maxFetchSize;
+
+    this.manualRedirect = manualRedirect;
  }

  async load() {
@ -1283,9 +1336,9 @@ class AsyncFetcher {
      reqresp.status = 0;
      reqresp.errorText = e.message;
    } finally {
+      recorder.addPageRecord(reqresp);
      // exclude direct fetch request with fake id
      if (networkId !== "0") {
-        recorder.addPageRecord(reqresp);
        recorder.removeReqResp(networkId);
      }
    }
@ -1313,6 +1366,7 @@ class AsyncFetcher {
      headers,
      body: reqresp.postData || undefined,
      signal,
+      redirect: this.manualRedirect ? "manual" : "follow",
    });

    if (this.filter && !this.filter(resp) && abort) {
@ -1329,6 +1383,7 @@ class AsyncFetcher {
    }

    if (reqresp.expectedSize === 0) {
+      reqresp.fillFetchResponse(resp);
      reqresp.payload = new Uint8Array();
      return;
    } else if (!resp.body) {
@ -1428,7 +1483,7 @@ class ResponseStreamAsyncFetcher extends AsyncFetcher {
 class NetworkLoadStreamAsyncFetcher extends AsyncFetcher {
  cdp: CDPSession;

-  constructor(opts: ResponseStreamAsyncFetchOptions) {
+  constructor(opts: NetworkLoadAsyncFetchOptions) {
    super(opts);
    this.cdp = opts.cdp;
  }
--- a/src/util/reqresp.ts
+++ b/src/util/reqresp.ts
@ -3,7 +3,7 @@ import { getStatusText } from "@webrecorder/wabac/src/utils.js";

 import { Protocol } from "puppeteer-core";
 import { postToGetUrl } from "warcio";
-
+import { HTML_TYPES } from "./constants.js";
 import { Response } from "undici";

 const CONTENT_LENGTH = "content-length";
@ -150,10 +150,15 @@ export class RequestResponseInfo {
    }
  }

+  isRedirectStatus() {
+    return this.status >= 300 && this.status < 400 && this.status !== 304;
+  }
+
  isSelfRedirect() {
-    if (this.status < 300 || this.status >= 400 || this.status === 304) {
+    if (!this.isRedirectStatus()) {
      return false;
    }
+
    try {
      const headers = new Headers(this.getResponseHeadersDict());
      const location = headers.get("location") || "";
@ -365,3 +370,18 @@ export class RequestResponseInfo {
    return value.replace(/\n/g, ", ");
  }
 }
+
+export function isHTMLContentType(contentType: string | null) {
+  // just load if no content-type
+  if (!contentType) {
+    return true;
+  }
+
+  const mime = contentType.split(";")[0];
+
+  if (HTML_TYPES.includes(mime)) {
+    return true;
+  }
+
+  return false;
+}
--- a/src/util/sitemapper.ts
+++ b/src/util/sitemapper.ts
@ -9,6 +9,8 @@ import { logger, formatErr } from "./logger.js";
 import { DETECT_SITEMAP } from "./constants.js";
 import { sleep } from "./timing.js";

+import { fetch, Response } from "undici";
+
 const SITEMAP_CONCURRENCY = 5;

 const TEXT_CONTENT_TYPE = ["text/plain"];
@ -237,7 +239,8 @@ export class SitemapReader extends EventEmitter {
      resp.headers.get("content-encoding") !== "gzip"
    ) {
      const ds = new DecompressionStream("gzip");
-      stream = body.pipeThrough(ds);
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      stream = body.pipeThrough(ds as any);
    } else {
      stream = body;
    }
--- a/src/util/state.ts
+++ b/src/util/state.ts
@ -66,7 +66,7 @@ export class PageState {

  callbacks: PageCallbacks = {};

-  isHTMLPage?: boolean;
+  isHTMLPage = true;
  text?: string;
  screenshotView?: Buffer;
  favicon?: string;
--- a/src/util/storage.ts
+++ b/src/util/storage.ts
@ -202,6 +202,7 @@ export async function getDirSize(dir: string) {
 }

 export async function checkDiskUtilization(
+  collDir: string,
  // TODO: Fix this the next time the file is edited.
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  params: Record<string, any>,
@ -209,7 +210,7 @@ export async function checkDiskUtilization(
  dfOutput = null,
 ) {
  const diskUsage: Record<string, string> = await getDiskUsage(
-    "/crawls",
+    collDir,
    dfOutput,
  );
  const usedPercentage = parseInt(diskUsage["Use%"].slice(0, -1));
--- a/src/util/warcwriter.ts
+++ b/src/util/warcwriter.ts
@ -11,6 +11,8 @@ import PQueue from "p-queue";

 const DEFAULT_ROLLOVER_SIZE = 1_000_000_000;

+let warcInfo = {};
+
 export type ResourceRecordData = {
  buffer: Uint8Array;
  resourceType: string;
@ -117,6 +119,8 @@ export class WARCWriter implements IndexerOffsetLength {
      );
    }

+    fh.write(await createWARCInfo(this.filename));
+
    return fh;
  }

@ -310,6 +314,33 @@ export class WARCWriter implements IndexerOffsetLength {
  }
 }

+// =================================================================
+export function setWARCInfo(
+  software: string,
+  otherParams?: Record<string, string>,
+) {
+  warcInfo = {
+    software,
+    format: "WARC File Format 1.1",
+    ...otherParams,
+  };
+}
+
+// =================================================================
+export async function createWARCInfo(filename: string) {
+  const warcVersion = "WARC/1.1";
+  const type = "warcinfo";
+
+  const record = await WARCRecord.createWARCInfo(
+    { filename, type, warcVersion },
+    warcInfo,
+  );
+  const buffer = await WARCSerializer.serialize(record, {
+    gzip: true,
+  });
+  return buffer;
+}
+
 // =================================================================
 export function streamFinish(fh: Writable) {
  const p = new Promise<void>((resolve) => {
--- a/src/util/worker.ts
+++ b/src/util/worker.ts
@ -2,7 +2,7 @@ import os from "os";

 import { logger, formatErr } from "./logger.js";
 import { sleep, timedRun } from "./timing.js";
-import { Recorder } from "./recorder.js";
+import { DirectFetchRequest, Recorder } from "./recorder.js";
 import { rxEscape } from "./seeds.js";
 import { CDPSession, Page } from "puppeteer-core";
 import { PageState, WorkerId } from "./state.js";
@ -20,8 +20,10 @@ export type WorkerOpts = {
  workerid: WorkerId;
  // eslint-disable-next-line @typescript-eslint/ban-types
  callbacks: Record<string, Function>;
-  directFetchCapture?:
-    | ((url: string) => Promise<{ fetched: boolean; mime: string }>)
+  directFetchCapture:
+    | ((
+        request: DirectFetchRequest,
+      ) => Promise<{ fetched: boolean; mime: string; ts: Date }>)
    | null;
  frameIdToExecId: Map<string, number>;
 };
@ -171,7 +173,7 @@ export class PageWorker {
        this.cdp = cdp;
        this.callbacks = {};
        const directFetchCapture = this.recorder
-          ? (x: string) => this.recorder!.directFetchCapture(x)
+          ? (req: DirectFetchRequest) => this.recorder!.directFetchCapture(req)
          : null;
        this.opts = {
          page,
--- a/tests/dryrun.test.js
+++ b/tests/dryrun.test.js
@ -0,0 +1,18 @@
+import child_process from "child_process";
+import fs from "fs";
+
+test("ensure dryRun crawl only writes pages and logs", async () => {
+  child_process.execSync(
+    'docker run -v $PWD/test-crawls:/crawls webrecorder/browsertrix-crawler crawl --url https://webrecorder.net/ --generateWACZ  --text --collection dry-run-wr-net --combineWARC --rolloverSize 10000 --limit 2 --title "test title" --description "test description" --warcPrefix custom-prefix --dryRun',
+  );
+
+  const files = fs.readdirSync("test-crawls/collections/dry-run-wr-net").sort();
+  expect(files.length).toBe(2);
+  expect(files[0]).toBe("logs");
+  expect(files[1]).toBe("pages");
+});
+
+
+
+
+
--- a/tests/pdf-crawl.test.js
+++ b/tests/pdf-crawl.test.js
@ -3,7 +3,7 @@ import fs from "fs";
 import path from "path";
 import { WARCParser } from "warcio";

-const PDF = "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf";
+const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf";

 test("ensure pdf is crawled", async () => {
  child_process.execSync(
--- a/tests/proxy.test.js
+++ b/tests/proxy.test.js
@ -0,0 +1,127 @@
+import { execSync, exec } from "child_process";
+
+const sleep = (ms) => new Promise((res) => setTimeout(res, ms));
+
+const PROXY_IMAGE = "tarampampam/3proxy:1.9.1";
+const SOCKS_PORT = "1080";
+const HTTP_PORT = "3128";
+const WRONG_PORT = "33130";
+
+const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf";
+const HTML = "https://webrecorder.net/";
+
+const extraArgs = "--limit 1 --failOnFailedSeed --timeout 10 --logging debug";
+
+let proxyAuthId;
+let proxyNoAuthId;
+
+beforeAll(() => {
+  execSync("docker network create proxy-test-net");
+
+  proxyAuthId = execSync(`docker run -e PROXY_LOGIN=user -e PROXY_PASSWORD=passw0rd -d --rm --network=proxy-test-net --name proxy-with-auth ${PROXY_IMAGE}`, {encoding: "utf-8"});
+
+  proxyNoAuthId = execSync(`docker run -d --rm --network=proxy-test-net --name proxy-no-auth ${PROXY_IMAGE}`, {encoding: "utf-8"});
+});
+
+afterAll(async () => {
+  execSync(`docker kill -s SIGINT ${proxyAuthId}`);
+  execSync(`docker kill -s SIGINT ${proxyNoAuthId}`);
+  await sleep(3000);
+  execSync("docker network rm proxy-test-net");
+});
+
+describe("socks5 + https proxy tests", () => {
+  for (const scheme of ["socks5", "http"]) {
+    const port = scheme === "socks5" ? SOCKS_PORT : HTTP_PORT;
+
+    for (const type of ["HTML page", "PDF"]) {
+
+      const url = type === "PDF" ? PDF : HTML;
+
+      test(`${scheme} proxy, ${type}, no auth`, () => {
+        let status = 0;
+
+        try {
+          execSync(`docker run -e PROXY_SERVER=${scheme}://proxy-no-auth:${port} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${url} ${extraArgs}`, {encoding: "utf-8"});
+        } catch (e) {
+          status = e.status;
+        }
+        expect(status).toBe(0);
+      });
+
+      test(`${scheme} proxy, ${type}, with auth`, () => {
+        let status = 0;
+
+        try {
+          execSync(`docker run -e PROXY_SERVER=${scheme}://user:passw0rd@proxy-with-auth:${port} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${url} ${extraArgs}`, {encoding: "utf-8"});
+        } catch (e) {
+          status = e.status;
+        }
+        // auth supported only for SOCKS5
+        expect(status).toBe(scheme === "socks5" ? 0 : 1);
+      });
+
+      test(`${scheme} proxy, ${type}, wrong auth`, () => {
+        let status = 0;
+
+        try {
+          execSync(`docker run -e PROXY_SERVER=${scheme}://user:passw1rd@proxy-with-auth:${port} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${url} ${extraArgs}`, {encoding: "utf-8"});
+        } catch (e) {
+          status = e.status;
+        }
+        expect(status).toBe(1);
+      });
+
+      test(`${scheme} proxy, ${type}, wrong protocol`, () => {
+        let status = 0;
+
+        try {
+          execSync(`docker run -e PROXY_SERVER=${scheme}://user:passw1rd@proxy-with-auth:${scheme === "socks5" ? HTTP_PORT : SOCKS_PORT} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${url} ${extraArgs}`, {encoding: "utf-8"});
+        } catch (e) {
+          status = e.status;
+        }
+        expect(status).toBe(1);
+      });
+    }
+
+    test(`${scheme} proxy, proxy missing error`, () => {
+      let status = 0;
+
+      try {
+        execSync(`docker run -e PROXY_SERVER=${scheme}://proxy-no-auth:${WRONG_PORT} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${HTML} ${extraArgs}`, {encoding: "utf-8"});
+      } catch (e) {
+        status = e.status;
+      }
+      expect(status).toBe(1);
+    });
+  }
+});
+
+
+test("http proxy, PDF, separate env vars", () => {
+  execSync(`docker run -e PROXY_HOST=proxy-no-auth -e PROXY_PORT=${HTTP_PORT} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${PDF} ${extraArgs}`, {encoding: "utf-8"});
+});
+
+test("http proxy set, but not running, separate env vars", () => {
+  let status = 0;
+
+  try {
+    execSync(`docker run -e PROXY_HOST=proxy-no-auth -e PROXY_PORT=${WRONG_PORT} --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --url ${PDF} ${extraArgs}`, {encoding: "utf-8"});
+  } catch (e) {
+    status = e.status;
+  }
+  expect(status).toBe(1);
+});
+
+test("http proxy set, but not running, cli arg", () => {
+  let status = 0;
+
+  try {
+    execSync(`docker run --rm --network=proxy-test-net webrecorder/browsertrix-crawler crawl --proxyServer http://proxy-no-auth:${WRONG_PORT} --url ${PDF} ${extraArgs}`, {encoding: "utf-8"});
+  } catch (e) {
+    status = e.status;
+  }
+  expect(status).toBe(1);
+});
+
+
--- a/tests/storage.test.js
+++ b/tests/storage.test.js
@ -29,6 +29,7 @@ grpcfuse       1000000      285000    715000  28% /crawls`;
  // with combineWARC + generateWACZ, projected is 285k + 4 * 5k = 310k = 31%
  // does not exceed 90% threshold
  const returnValue = await checkDiskUtilization(
+    '/crawls',
    params,
    5000 * 1024,
    mockDfOutput,
@ -55,6 +56,7 @@ grpcfuse       100000    85000     15000  85% /crawls`;
  // with generateWACZ, projected is 85k + 3k x 2 = 91k = 91%
  // exceeds 90% threshold
  const returnValue = await checkDiskUtilization(
+    '/crawls',
    params,
    3000 * 1024,
    mockDfOutput,
--- a/tests/warcinfo.test.js
+++ b/tests/warcinfo.test.js
@ -1,8 +1,11 @@
 import fs from "fs";
 import zlib from "zlib";
+import path from "path";
 import child_process from "child_process";

-test("check that the warcinfo file works as expected on the command line", async () => {
+test("run crawl", async() => {
+  let success = false;
+
  try {
    const configYaml = fs.readFileSync("tests/fixtures/crawl-2.yaml", "utf8");
    const proc = child_process.execSync(
@ -11,10 +14,42 @@ test("check that the warcinfo file works as expected on the command line", async
    );

    console.log(proc);
+    success = true;
  } catch (error) {
    console.log(error);
  }

+  expect(success).toBe(true);
+});
+
+test("check that the warcinfo for individual WARC is as expected", async () => {
+
+  const warcs = fs.readdirSync("test-crawls/collections/warcinfo/archive/");
+
+  let filename = "";
+
+  for (const name of warcs) {
+    if (name.startsWith("rec-")) {
+      filename = path.join("test-crawls/collections/warcinfo/archive/", name);
+      break;
+    }
+  }
+
+  const warcData = fs.readFileSync(filename);
+
+  const data = zlib.gunzipSync(warcData);
+
+  const string = data.toString("utf8");
+
+  expect(string.indexOf("operator: test")).toBeGreaterThan(-1);
+  expect(string.indexOf("host: hostname")).toBeGreaterThan(-1);
+  expect(
+    string.match(/Browsertrix-Crawler \d[\w.-]+ \(with warcio.js \d[\w.-]+\)/),
+  ).not.toEqual(null);
+  expect(string.indexOf("format: WARC File Format 1.1")).toBeGreaterThan(-1);
+});
+
+test("check that the warcinfo for combined WARC file is as expected", async () => {
  const warcData = fs.readFileSync(
    "test-crawls/collections/warcinfo/warcinfo_0.warc.gz",
  );
--- a/yarn.lock
+++ b/yarn.lock
@ -2386,6 +2386,14 @@ fd-slicer@~1.1.0:
  dependencies:
    pend "~1.2.0"

+fetch-socks@^1.3.0:
+  version "1.3.0"
+  resolved "https://registry.yarnpkg.com/fetch-socks/-/fetch-socks-1.3.0.tgz#1f07b26924b5e7370aa23fd6e9332a5863736d1b"
+  integrity sha512-Cq7O53hoNiVeOs6u54f8M/H/w2yzhmnTQ3tcAJj9FNKYOeNGmt8qNU1zpWOzJD09f0uqfmBXxLbzWPsnT6GcRw==
+  dependencies:
+    socks "^2.8.1"
+    undici "^6.10.1"
+
 file-entry-cache@^6.0.1:
  version "6.0.1"
  resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027"
@ -2778,6 +2786,14 @@ ioredis@^5.3.2:
    redis-parser "^3.0.0"
    standard-as-callback "^2.1.0"

+ip-address@^9.0.5:
+  version "9.0.5"
+  resolved "https://registry.yarnpkg.com/ip-address/-/ip-address-9.0.5.tgz#117a960819b08780c3bd1f14ef3c1cc1d3f3ea5a"
+  integrity sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==
+  dependencies:
+    jsbn "1.1.0"
+    sprintf-js "^1.1.3"
+
 ip@^1.1.8:
  version "1.1.8"
  resolved "https://registry.yarnpkg.com/ip/-/ip-1.1.8.tgz#ae05948f6b075435ed3307acce04629da8cdbf48"
@ -3427,6 +3443,11 @@ js-yaml@^4.1.0:
  dependencies:
    argparse "^2.0.1"

+jsbn@1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/jsbn/-/jsbn-1.1.0.tgz#b01307cb29b618a1ed26ec79e911f803c4da0040"
+  integrity sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==
+
 jsesc@^2.5.1:
  version "2.5.2"
  resolved "https://registry.yarnpkg.com/jsesc/-/jsesc-2.5.2.tgz#80564d2e483dacf6e8ef209650a67df3f0c283a4"
@ -4437,6 +4458,14 @@ socks@^2.7.1:
    ip "^2.0.0"
    smart-buffer "^4.2.0"

+socks@^2.8.1:
+  version "2.8.3"
+  resolved "https://registry.yarnpkg.com/socks/-/socks-2.8.3.tgz#1ebd0f09c52ba95a09750afe3f3f9f724a800cb5"
+  integrity sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==
+  dependencies:
+    ip-address "^9.0.5"
+    smart-buffer "^4.2.0"
+
 source-map-support@0.5.13:
  version "0.5.13"
  resolved "https://registry.yarnpkg.com/source-map-support/-/source-map-support-0.5.13.tgz#31b24a9c2e73c2de85066c0feb7d44767ed52932"
@ -4455,6 +4484,11 @@ split-on-first@^1.0.0:
  resolved "https://registry.yarnpkg.com/split-on-first/-/split-on-first-1.1.0.tgz#f610afeee3b12bce1d0c30425e76398b78249a5f"
  integrity sha512-43ZssAJaMusuKWL8sKUBQXHWOpq8d6CfN/u1p4gUzfJkM05C8rxTmYrkIPTXapZpORA6LkkzcUulJ8FqA7Uudw==

+sprintf-js@^1.1.3:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.1.3.tgz#4914b903a2f8b685d17fdf78a70e917e872e444a"
+  integrity sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==
+
 sprintf-js@~1.0.2:
  version "1.0.3"
  resolved "https://registry.yarnpkg.com/sprintf-js/-/sprintf-js-1.0.3.tgz#04e6926f662895354f3dd015203633b857297e2c"
@ -4842,7 +4876,7 @@ undici-types@~5.25.1:
  resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.25.3.tgz#e044115914c85f0bcbb229f346ab739f064998c3"
  integrity sha512-Ga1jfYwRn7+cP9v8auvEXN1rX3sWqlayd4HP7OKk4mZWylEmu3KzXDUGrQUN6Ol7qo1gPvB2e5gX6udnyEPgdA==

-undici@^6.18.2:
+undici@^6.10.1, undici@^6.18.2:
  version "6.18.2"
  resolved "https://registry.yarnpkg.com/undici/-/undici-6.18.2.tgz#f662a5dc33cf654fc412a9912e5a07b138d75c97"
  integrity sha512-o/MQLTwRm9IVhOqhZ0NQ9oXax1ygPjw6Vs+Vq/4QRjbOAC3B1GCHy7TYxxbExKlb7bzDRzt9vBWU6BDz0RFfYg==