mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00

* Create an argument parser class * move constants, arg parser to separate files in utils/* * ensure yaml config overriden by command-line args * yaml loading work: - simplify yaml config by using yargs.config option - move all option parsing to argParser, simply expose parseArgs - export constants directly - add lint to util/* files * support inline 'seeds' in cmdline and yaml config tests: - add test for crawl config, ensuring seeds crawled + wacz created - add test to ensure cmdline overrides yaml config * scope fix: empty scope implies only fixed list, use '.*' for any scope * lint fix * update readme with yaml config info * allow 'url' and 'seeds' if both provided Co-authored-by: Emma Dickson <emmadickson@Emmas-MacBook-Pro.local> Co-authored-by: emmadickson <emma.dickson@artsymail.com>
53 lines
1.2 KiB
Docker
53 lines
1.2 KiB
Docker
ARG BROWSER_VERSION=90
|
|
|
|
FROM oldwebtoday/chrome:${BROWSER_VERSION} as chrome
|
|
|
|
FROM nikolaik/python-nodejs:python3.8-nodejs14
|
|
|
|
RUN curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
|
|
|
|
RUN apt-get update -y \
|
|
&& apt-get install --no-install-recommends -qqy fonts-stix locales-all redis-server xvfb \
|
|
&& apt-get clean \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
ARG BROWSER_VERSION
|
|
|
|
ENV PROXY_HOST=localhost \
|
|
PROXY_PORT=8080 \
|
|
PROXY_CA_URL=http://wsgiprox/download/pem \
|
|
PROXY_CA_FILE=/tmp/proxy-ca.pem \
|
|
DISPLAY=:99 \
|
|
GEOMETRY=1360x1020x16 \
|
|
BROWSER_VERSION=${BROWSER_VERSION}
|
|
|
|
COPY --from=chrome /tmp/*.deb /deb/
|
|
COPY --from=chrome /app/libpepflashplayer.so /app/libpepflashplayer.so
|
|
RUN dpkg -i /deb/*.deb; apt-get update; apt-get install -fqqy && \
|
|
rm -rf /var/lib/opts/lists/*
|
|
|
|
WORKDIR /app
|
|
|
|
ADD requirements.txt /app/
|
|
RUN pip install -r requirements.txt
|
|
|
|
ADD package.json /app/
|
|
|
|
# to allow forcing rebuilds from this stage
|
|
ARG REBUILD
|
|
|
|
RUN yarn install
|
|
|
|
ADD uwsgi.ini /app/
|
|
ADD *.js /app/
|
|
ADD util/*.js /app/util/
|
|
COPY config.yaml /app/
|
|
ADD screencast/ /app/screencast/
|
|
|
|
RUN ln -s /app/main.js /usr/bin/crawl
|
|
RUN ln -s /app/create-login-profile.js /usr/bin/create-login-profile
|
|
|
|
WORKDIR /crawls
|
|
|
|
CMD ["crawl"]
|
|
|