cleanup dockerfile + fix test (#595)

- remove obsolete line from Dockerfile - fix pdf test to webrecorder-hosted pdf
2025-12-08 06:09:48 +00:00 · 2024-06-06 12:14:44 -07:00 · 2024-06-06 12:14:44 -07:00 · 1bd94d93a1
commit 1bd94d93a1
parent 068ee79288
2 changed files with 1 additions and 4 deletions
--- a/3
+++ b/3
@ -28,9 +28,6 @@ ADD package.json /app/
 # to allow forcing rebuilds from this stage
 ARG REBUILD

-# Prefetch tldextract so pywb is able to boot in environments with limited internet access
-RUN tldextract --update
-
 # Download and format ad host blocklist as JSON
 RUN mkdir -p /tmp/ads && cd /tmp/ads && \
    curl -vs -o ad-hosts.txt https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \
--- a/tests/pdf-crawl.test.js
+++ b/tests/pdf-crawl.test.js
@ -3,7 +3,7 @@ import fs from "fs";
 import path from "path";
 import { WARCParser } from "warcio";

-const PDF = "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf";
+const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf";

 test("ensure pdf is crawled", async () => {
  child_process.execSync(