mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 14:33:17 +00:00
cleanup dockerfile + fix test (#595)
- remove obsolete line from Dockerfile - fix pdf test to webrecorder-hosted pdf
This commit is contained in:
parent
068ee79288
commit
1bd94d93a1
2 changed files with 1 additions and 4 deletions
|
@ -28,9 +28,6 @@ ADD package.json /app/
|
|||
# to allow forcing rebuilds from this stage
|
||||
ARG REBUILD
|
||||
|
||||
# Prefetch tldextract so pywb is able to boot in environments with limited internet access
|
||||
RUN tldextract --update
|
||||
|
||||
# Download and format ad host blocklist as JSON
|
||||
RUN mkdir -p /tmp/ads && cd /tmp/ads && \
|
||||
curl -vs -o ad-hosts.txt https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \
|
||||
|
|
|
@ -3,7 +3,7 @@ import fs from "fs";
|
|||
import path from "path";
|
||||
import { WARCParser } from "warcio";
|
||||
|
||||
const PDF = "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf";
|
||||
const PDF = "https://specs.webrecorder.net/wacz/1.1.1/wacz-2021.pdf";
|
||||
|
||||
test("ensure pdf is crawled", async () => {
|
||||
child_process.execSync(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue