mirror of
https://github.com/webrecorder/browsertrix-crawler.git
synced 2025-10-19 06:23:16 +00:00
Improved support for running as non-root (#503)
This PR provides improved support for running crawler as non-root, matching the user to the uid/gid of the crawl volume. This fixes #502 initial regression from 0.12.4, where `chmod u+x` was used instead of `chmod a+x` on the node binary files. However, that was not enough to fully support equivalent signal handling / graceful shutdown as when running with the same user. To make the running as different user path work the same way: - need to switch to `gosu` instead of `su` (added in Brave 1.64.109 image) - run all child processes as detached (redis-server, socat, wacz, etc..) to avoid them automatically being killed via SIGINT/SIGTERM - running detached is controlled via `DETACHED_CHILD_PROC=1` env variable, set to 1 by default in the Dockerfile (to allow for overrides just in case) A test has been added which runs one of the tests with a non-root `test-crawls` directory to test the different user path. The test (saved-state.test.js) includes sending interrupt signals and graceful shutdown and allows testing of those features for a non-root gosu execution. Also bumping crawler version to 1.0.1
This commit is contained in:
parent
5e2768ebcf
commit
1fe810b1df
5 changed files with 35 additions and 21 deletions
5
.github/workflows/ci.yaml
vendored
5
.github/workflows/ci.yaml
vendored
|
@ -44,3 +44,8 @@ jobs:
|
|||
run: docker-compose build
|
||||
- name: run jest
|
||||
run: sudo yarn test
|
||||
- name: run saved state test with volume owned by different user
|
||||
run: |
|
||||
sudo rm -rf ./test-crawls
|
||||
mkdir test-crawls
|
||||
sudo yarn test ./tests/saved-state.test.js
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
ARG BROWSER_VERSION=1.62.165
|
||||
ARG BROWSER_VERSION=1.64.109
|
||||
ARG BROWSER_IMAGE_BASE=webrecorder/browsertrix-browser-base:brave-${BROWSER_VERSION}
|
||||
|
||||
FROM ${BROWSER_IMAGE_BASE}
|
||||
|
@ -15,7 +15,8 @@ ENV PROXY_HOST=localhost \
|
|||
BROWSER_VERSION=${BROWSER_VERSION} \
|
||||
BROWSER_BIN=google-chrome \
|
||||
OPENSSL_CONF=/app/openssl.conf \
|
||||
VNC_PASS=vncpassw0rd!
|
||||
VNC_PASS=vncpassw0rd! \
|
||||
DETACHED_CHILD_PROC=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
|
@ -47,7 +48,7 @@ ADD config/ /app/
|
|||
|
||||
ADD html/ /app/html/
|
||||
|
||||
RUN chmod u+x /app/dist/main.js /app/dist/create-login-profile.js
|
||||
RUN chmod a+x /app/dist/main.js /app/dist/create-login-profile.js
|
||||
|
||||
RUN ln -s /app/dist/main.js /usr/bin/crawl; ln -s /app/dist/create-login-profile.js /usr/bin/create-login-profile
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ if [ "$MY_GID" != "$VOLUME_GID" ] || [ "$MY_UID" != "$VOLUME_UID" ]; then
|
|||
useradd -ms /bin/bash -g $VOLUME_GID btrix
|
||||
usermod -o -u $VOLUME_UID btrix > /dev/null
|
||||
|
||||
su btrix -c '"$@"' -- argv0-ignore "$@"
|
||||
exec gosu btrix:btrix "$@"
|
||||
else
|
||||
exec "$@"
|
||||
fi
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "browsertrix-crawler",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.1",
|
||||
"main": "browsertrix-crawler",
|
||||
"type": "module",
|
||||
"repository": "https://github.com/webrecorder/browsertrix-crawler",
|
||||
|
|
|
@ -73,6 +73,8 @@ const FETCH_TIMEOUT_SECS = 30;
|
|||
const PAGE_OP_TIMEOUT_SECS = 5;
|
||||
const SITEMAP_INITIAL_FETCH_TIMEOUT_SECS = 30;
|
||||
|
||||
const RUN_DETACHED = process.env.DETACHED_CHILD_PROC == "1";
|
||||
|
||||
const POST_CRAWL_STATES = [
|
||||
"generate-wacz",
|
||||
"uploading-wacz",
|
||||
|
@ -412,6 +414,7 @@ export class Crawler {
|
|||
return child_process.spawn("redis-server", redisArgs, {
|
||||
cwd: "/tmp/",
|
||||
stdio: redisStdio,
|
||||
detached: RUN_DETACHED,
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -462,23 +465,28 @@ export class Crawler {
|
|||
}
|
||||
});
|
||||
|
||||
child_process.spawn("socat", [
|
||||
"tcp-listen:9222,reuseaddr,fork",
|
||||
"tcp:localhost:9221",
|
||||
]);
|
||||
child_process.spawn(
|
||||
"socat",
|
||||
["tcp-listen:9222,reuseaddr,fork", "tcp:localhost:9221"],
|
||||
{ detached: RUN_DETACHED },
|
||||
);
|
||||
|
||||
if (!this.params.headless && !process.env.NO_XVFB) {
|
||||
child_process.spawn("Xvfb", [
|
||||
process.env.DISPLAY || "",
|
||||
"-listen",
|
||||
"tcp",
|
||||
"-screen",
|
||||
"0",
|
||||
process.env.GEOMETRY || "",
|
||||
"-ac",
|
||||
"+extension",
|
||||
"RANDR",
|
||||
]);
|
||||
child_process.spawn(
|
||||
"Xvfb",
|
||||
[
|
||||
process.env.DISPLAY || "",
|
||||
"-listen",
|
||||
"tcp",
|
||||
"-screen",
|
||||
"0",
|
||||
process.env.GEOMETRY || "",
|
||||
"-ac",
|
||||
"+extension",
|
||||
"RANDR",
|
||||
],
|
||||
{ detached: RUN_DETACHED },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1427,7 +1435,7 @@ self.__bx_behaviors.selectMainBehavior();
|
|||
|
||||
// create WACZ
|
||||
const waczResult = await this.awaitProcess(
|
||||
child_process.spawn("wacz", createArgs),
|
||||
child_process.spawn("wacz", createArgs, { detached: RUN_DETACHED }),
|
||||
);
|
||||
|
||||
if (waczResult !== 0) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue