2015-06-03 10:50:39 -07:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2021-02-19 18:35:10 -05:00
|
|
|
//go:build linux
|
2015-06-03 10:50:39 -07:00
|
|
|
|
|
|
|
|
package syscall_test
|
|
|
|
|
|
|
|
|
|
import (
|
syscall: add CgroupFD support for ForkExec on Linux
Implement CLONE_INTO_CGROUP feature, allowing to put a child in a
specified cgroup in a clean and simple way. Note that the feature only
works for cgroup v2, and requires Linux kernel 5.7 or newer.
Using the feature requires a new syscall, clone3. Currently this is the
only reason to use clone3, but the code is structured in a way so that
other cases may be easily added in the future.
Add a test case.
While at it, try to simplify the syscall calling code in
forkAndExecInChild1, which became complicated over time because:
1. It was using either rawVforkSyscall or RawSyscall6 depending on
whether CLONE_NEWUSER was set.
2. On Linux/s390, the first two arguments to clone(2) system call are
swapped (which deserved a mention in Linux ABI hall of shame). It
was worked around in rawVforkSyscall on s390, but had to be
implemented via a switch/case when using RawSyscall6, making the code
less clear.
Let's
- modify rawVforkSyscall to have two arguments (which is also required
for clone3);
- remove the arguments workaround from s390 asm, instead implementing
arguments swap in the caller (which still looks ugly but at least
it's done once and is clearly documented now);
- use rawVforkSyscall for all cases (since it is essentially similar to
RawSyscall6, except for having less parameters, not returning r2, and
saving/restoring the return address before/after syscall on 386 and
amd64).
Updates #51246.
Change-Id: Ifcd418ebead9257177338ffbcccd0bdecb94474e
Reviewed-on: https://go-review.googlesource.com/c/go/+/417695
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
2022-07-14 21:18:15 -07:00
|
|
|
"bytes"
|
os/exec: handle Unshareflags with CLONE_NEWNS
In some newer Linux distros, systemd forces
all mount namespaces to be shared, starting
at /. This disables the CLONE_NEWNS
flag in unshare(2) and clone(2).
While this problem is most commonly seen
on systems with systemd, it can happen anywhere,
due to how Linux namespaces now work.
Hence, to create a private mount namespace,
it is not sufficient to just set
CLONE_NEWS; you have to call mount(2) to change
the behavior of namespaces, i.e.
mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)
This is tested and working and we can now correctly
start child process with private namespaces on Linux
distros that use systemd.
The new test works correctly on Ubuntu 16.04.2 LTS.
It fails if I comment out the new Mount, and
succeeds otherwise. In each case it correctly
cleans up after itself.
Fixes #19661
Change-Id: I52240b59628e3772b529d9bbef7166606b0c157d
Reviewed-on: https://go-review.googlesource.com/38471
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-22 14:40:55 -07:00
|
|
|
"flag"
|
|
|
|
|
"fmt"
|
2017-07-12 21:31:30 +00:00
|
|
|
"internal/testenv"
|
2017-05-17 02:05:32 -07:00
|
|
|
"io"
|
2015-06-03 10:50:39 -07:00
|
|
|
"os"
|
|
|
|
|
"os/exec"
|
2017-05-17 02:05:32 -07:00
|
|
|
"os/user"
|
syscall: add CgroupFD support for ForkExec on Linux
Implement CLONE_INTO_CGROUP feature, allowing to put a child in a
specified cgroup in a clean and simple way. Note that the feature only
works for cgroup v2, and requires Linux kernel 5.7 or newer.
Using the feature requires a new syscall, clone3. Currently this is the
only reason to use clone3, but the code is structured in a way so that
other cases may be easily added in the future.
Add a test case.
While at it, try to simplify the syscall calling code in
forkAndExecInChild1, which became complicated over time because:
1. It was using either rawVforkSyscall or RawSyscall6 depending on
whether CLONE_NEWUSER was set.
2. On Linux/s390, the first two arguments to clone(2) system call are
swapped (which deserved a mention in Linux ABI hall of shame). It
was worked around in rawVforkSyscall on s390, but had to be
implemented via a switch/case when using RawSyscall6, making the code
less clear.
Let's
- modify rawVforkSyscall to have two arguments (which is also required
for clone3);
- remove the arguments workaround from s390 asm, instead implementing
arguments swap in the caller (which still looks ugly but at least
it's done once and is clearly documented now);
- use rawVforkSyscall for all cases (since it is essentially similar to
RawSyscall6, except for having less parameters, not returning r2, and
saving/restoring the return address before/after syscall on 386 and
amd64).
Updates #51246.
Change-Id: Ifcd418ebead9257177338ffbcccd0bdecb94474e
Reviewed-on: https://go-review.googlesource.com/c/go/+/417695
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
2022-07-14 21:18:15 -07:00
|
|
|
"path"
|
2017-04-24 16:09:24 -07:00
|
|
|
"path/filepath"
|
2018-08-29 03:24:13 +00:00
|
|
|
"runtime"
|
2017-05-17 02:05:32 -07:00
|
|
|
"strconv"
|
2015-06-03 10:50:39 -07:00
|
|
|
"strings"
|
|
|
|
|
"syscall"
|
|
|
|
|
"testing"
|
2017-05-17 02:05:32 -07:00
|
|
|
"unsafe"
|
2015-06-03 10:50:39 -07:00
|
|
|
)
|
|
|
|
|
|
2017-08-23 11:49:22 -07:00
|
|
|
func isDocker() bool {
|
|
|
|
|
_, err := os.Stat("/.dockerenv")
|
|
|
|
|
return err == nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isLXC() bool {
|
|
|
|
|
return os.Getenv("container") == "lxc"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func skipInContainer(t *testing.T) {
|
2019-10-17 16:30:49 +00:00
|
|
|
// TODO: the callers of this func are using this func to skip
|
|
|
|
|
// tests when running as some sort of "fake root" that's uid 0
|
|
|
|
|
// but lacks certain Linux capabilities. Most of the Go builds
|
|
|
|
|
// run in privileged containers, though, where root is much
|
|
|
|
|
// closer (if not identical) to the real root. We should test
|
|
|
|
|
// for what we need exactly (which capabilities are active?),
|
|
|
|
|
// instead of just assuming "docker == bad". Then we'd get more test
|
|
|
|
|
// coverage on a bunch of builders too.
|
2017-08-23 11:49:22 -07:00
|
|
|
if isDocker() {
|
|
|
|
|
t.Skip("skip this test in Docker container")
|
|
|
|
|
}
|
|
|
|
|
if isLXC() {
|
|
|
|
|
t.Skip("skip this test in LXC container")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-17 09:05:02 +02:00
|
|
|
func skipNoUserNamespaces(t *testing.T) {
|
|
|
|
|
if _, err := os.Stat("/proc/self/ns/user"); err != nil {
|
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
|
t.Skip("kernel doesn't support user namespaces")
|
|
|
|
|
}
|
|
|
|
|
if os.IsPermission(err) {
|
|
|
|
|
t.Skip("unable to test user namespaces due to permissions")
|
|
|
|
|
}
|
|
|
|
|
t.Fatalf("Failed to stat /proc/self/ns/user: %v", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-06-06 18:07:13 +08:00
|
|
|
func skipUnprivilegedUserClone(t *testing.T) {
|
|
|
|
|
// Skip the test if the sysctl that prevents unprivileged user
|
|
|
|
|
// from creating user namespaces is enabled.
|
2020-10-29 14:17:47 -04:00
|
|
|
data, errRead := os.ReadFile("/proc/sys/kernel/unprivileged_userns_clone")
|
2022-07-14 19:40:23 -07:00
|
|
|
if os.IsNotExist(errRead) {
|
|
|
|
|
// This file is only available in some Debian/Ubuntu kernels.
|
|
|
|
|
return
|
|
|
|
|
}
|
2019-06-06 19:17:01 +08:00
|
|
|
if errRead != nil || len(data) < 1 || data[0] == '0' {
|
2019-06-06 18:07:13 +08:00
|
|
|
t.Skip("kernel prohibits user namespace in unprivileged process")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-11-26 11:47:32 +13:00
|
|
|
// Check if we are in a chroot by checking if the inode of / is
|
|
|
|
|
// different from 2 (there is no better test available to non-root on
|
|
|
|
|
// linux).
|
|
|
|
|
func isChrooted(t *testing.T) bool {
|
|
|
|
|
root, err := os.Stat("/")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("cannot stat /: %v", err)
|
|
|
|
|
}
|
|
|
|
|
return root.Sys().(*syscall.Stat_t).Ino != 2
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-27 15:02:31 -07:00
|
|
|
func checkUserNS(t *testing.T) {
|
2017-08-23 11:49:22 -07:00
|
|
|
skipInContainer(t)
|
2019-09-17 09:05:02 +02:00
|
|
|
skipNoUserNamespaces(t)
|
2015-11-26 11:47:32 +13:00
|
|
|
if isChrooted(t) {
|
|
|
|
|
// create_user_ns in the kernel (see
|
|
|
|
|
// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/kernel/user_namespace.c)
|
|
|
|
|
// forbids the creation of user namespaces when chrooted.
|
|
|
|
|
t.Skip("cannot create user namespaces when chrooted")
|
|
|
|
|
}
|
2015-08-31 08:41:43 -07:00
|
|
|
// On some systems, there is a sysctl setting.
|
|
|
|
|
if os.Getuid() != 0 {
|
2019-06-06 18:07:13 +08:00
|
|
|
skipUnprivilegedUserClone(t)
|
2015-08-31 08:41:43 -07:00
|
|
|
}
|
2017-07-17 13:51:37 -04:00
|
|
|
// On Centos 7 make sure they set the kernel parameter user_namespace=1
|
|
|
|
|
// See issue 16283 and 20796.
|
|
|
|
|
if _, err := os.Stat("/sys/module/user_namespace/parameters/enable"); err == nil {
|
2020-10-29 14:17:47 -04:00
|
|
|
buf, _ := os.ReadFile("/sys/module/user_namespace/parameters/enabled")
|
2017-07-17 13:51:37 -04:00
|
|
|
if !strings.HasPrefix(string(buf), "Y") {
|
|
|
|
|
t.Skip("kernel doesn't support user namespaces")
|
|
|
|
|
}
|
|
|
|
|
}
|
2018-07-17 22:59:35 +00:00
|
|
|
|
|
|
|
|
// On Centos 7.5+, user namespaces are disabled if user.max_user_namespaces = 0
|
|
|
|
|
if _, err := os.Stat("/proc/sys/user/max_user_namespaces"); err == nil {
|
2020-10-29 14:17:47 -04:00
|
|
|
buf, errRead := os.ReadFile("/proc/sys/user/max_user_namespaces")
|
2018-07-17 22:59:35 +00:00
|
|
|
if errRead == nil && buf[0] == '0' {
|
|
|
|
|
t.Skip("kernel doesn't support user namespaces")
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-05-27 15:02:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func whoamiCmd(t *testing.T, uid, gid int, setgroups bool) *exec.Cmd {
|
|
|
|
|
checkUserNS(t)
|
2015-06-03 10:50:39 -07:00
|
|
|
cmd := exec.Command("whoami")
|
|
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
|
Cloneflags: syscall.CLONE_NEWUSER,
|
|
|
|
|
UidMappings: []syscall.SysProcIDMap{
|
|
|
|
|
{ContainerID: 0, HostID: uid, Size: 1},
|
|
|
|
|
},
|
|
|
|
|
GidMappings: []syscall.SysProcIDMap{
|
2015-06-15 11:35:56 -07:00
|
|
|
{ContainerID: 0, HostID: gid, Size: 1},
|
2015-06-03 10:50:39 -07:00
|
|
|
},
|
|
|
|
|
GidMappingsEnableSetgroups: setgroups,
|
|
|
|
|
}
|
|
|
|
|
return cmd
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-15 11:35:56 -07:00
|
|
|
func testNEWUSERRemap(t *testing.T, uid, gid int, setgroups bool) {
|
|
|
|
|
cmd := whoamiCmd(t, uid, gid, setgroups)
|
2015-06-03 10:50:39 -07:00
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
|
|
|
|
}
|
|
|
|
|
sout := strings.TrimSpace(string(out))
|
|
|
|
|
want := "root"
|
|
|
|
|
if sout != want {
|
|
|
|
|
t.Fatalf("whoami = %q; want %q", out, want)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestCloneNEWUSERAndRemapRootDisableSetgroups(t *testing.T) {
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("skipping root only test")
|
|
|
|
|
}
|
2015-06-15 11:35:56 -07:00
|
|
|
testNEWUSERRemap(t, 0, 0, false)
|
2015-06-03 10:50:39 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestCloneNEWUSERAndRemapRootEnableSetgroups(t *testing.T) {
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("skipping root only test")
|
|
|
|
|
}
|
2017-01-17 16:16:42 +09:00
|
|
|
testNEWUSERRemap(t, 0, 0, true)
|
2015-06-03 10:50:39 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestCloneNEWUSERAndRemapNoRootDisableSetgroups(t *testing.T) {
|
|
|
|
|
if os.Getuid() == 0 {
|
|
|
|
|
t.Skip("skipping unprivileged user only test")
|
|
|
|
|
}
|
2015-06-15 11:35:56 -07:00
|
|
|
testNEWUSERRemap(t, os.Getuid(), os.Getgid(), false)
|
2015-06-03 10:50:39 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestCloneNEWUSERAndRemapNoRootSetgroupsEnableSetgroups(t *testing.T) {
|
|
|
|
|
if os.Getuid() == 0 {
|
|
|
|
|
t.Skip("skipping unprivileged user only test")
|
|
|
|
|
}
|
2015-06-15 11:35:56 -07:00
|
|
|
cmd := whoamiCmd(t, os.Getuid(), os.Getgid(), true)
|
2015-06-03 10:50:39 -07:00
|
|
|
err := cmd.Run()
|
|
|
|
|
if err == nil {
|
|
|
|
|
t.Skip("probably old kernel without security fix")
|
|
|
|
|
}
|
2015-06-19 13:48:06 -07:00
|
|
|
if !os.IsPermission(err) {
|
2015-06-03 10:50:39 -07:00
|
|
|
t.Fatalf("Unprivileged gid_map rewriting with GidMappingsEnableSetgroups must fail")
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-08-26 20:45:28 -07:00
|
|
|
|
|
|
|
|
func TestEmptyCredGroupsDisableSetgroups(t *testing.T) {
|
|
|
|
|
cmd := whoamiCmd(t, os.Getuid(), os.Getgid(), false)
|
|
|
|
|
cmd.SysProcAttr.Credential = &syscall.Credential{}
|
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-05-18 18:47:24 -07:00
|
|
|
|
|
|
|
|
func TestUnshare(t *testing.T) {
|
2017-08-23 11:49:22 -07:00
|
|
|
skipInContainer(t)
|
2016-05-18 18:47:24 -07:00
|
|
|
// Make sure we are running as root so we have permissions to use unshare
|
|
|
|
|
// and create a network namespace.
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("kernel prohibits unshare in unprivileged process, unless using user namespace")
|
|
|
|
|
}
|
|
|
|
|
|
2016-06-02 17:17:02 +09:00
|
|
|
path := "/proc/net/dev"
|
|
|
|
|
if _, err := os.Stat(path); err != nil {
|
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
|
t.Skip("kernel doesn't support proc filesystem")
|
|
|
|
|
}
|
|
|
|
|
if os.IsPermission(err) {
|
|
|
|
|
t.Skip("unable to test proc filesystem due to permissions")
|
|
|
|
|
}
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
2016-06-14 15:33:15 -04:00
|
|
|
if _, err := os.Stat("/proc/self/ns/net"); err != nil {
|
|
|
|
|
if os.IsNotExist(err) {
|
|
|
|
|
t.Skip("kernel doesn't support net namespace")
|
|
|
|
|
}
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
2016-06-02 17:17:02 +09:00
|
|
|
|
2020-10-29 14:17:47 -04:00
|
|
|
orig, err := os.ReadFile(path)
|
2016-10-05 14:37:25 -04:00
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
origLines := strings.Split(strings.TrimSpace(string(orig)), "\n")
|
|
|
|
|
|
2016-06-02 17:17:02 +09:00
|
|
|
cmd := exec.Command("cat", path)
|
2016-05-18 18:47:24 -07:00
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
2016-05-31 19:44:48 -07:00
|
|
|
Unshareflags: syscall.CLONE_NEWNET,
|
2016-05-18 18:47:24 -07:00
|
|
|
}
|
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
2017-07-14 19:02:05 +00:00
|
|
|
if strings.Contains(err.Error(), "operation not permitted") {
|
|
|
|
|
// Issue 17206: despite all the checks above,
|
|
|
|
|
// this still reportedly fails for some users.
|
|
|
|
|
// (older kernels?). Just skip.
|
|
|
|
|
t.Skip("skipping due to permission error")
|
|
|
|
|
}
|
2016-05-18 18:47:24 -07:00
|
|
|
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check there is only the local network interface
|
|
|
|
|
sout := strings.TrimSpace(string(out))
|
2016-05-19 22:26:01 -07:00
|
|
|
if !strings.Contains(sout, "lo:") {
|
2016-05-18 18:47:24 -07:00
|
|
|
t.Fatalf("Expected lo network interface to exist, got %s", sout)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
lines := strings.Split(sout, "\n")
|
2016-10-05 14:37:25 -04:00
|
|
|
if len(lines) >= len(origLines) {
|
|
|
|
|
t.Fatalf("Got %d lines of output, want <%d", len(lines), len(origLines))
|
2016-05-18 18:47:24 -07:00
|
|
|
}
|
|
|
|
|
}
|
2016-05-27 15:02:31 -07:00
|
|
|
|
|
|
|
|
func TestGroupCleanup(t *testing.T) {
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("we need root for credential")
|
|
|
|
|
}
|
|
|
|
|
cmd := exec.Command("id")
|
|
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
|
Credential: &syscall.Credential{
|
|
|
|
|
Uid: 0,
|
|
|
|
|
Gid: 0,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
|
|
|
|
}
|
|
|
|
|
strOut := strings.TrimSpace(string(out))
|
2022-03-31 16:51:32 -04:00
|
|
|
t.Logf("id: %s", strOut)
|
|
|
|
|
|
2017-04-13 21:22:22 +00:00
|
|
|
expected := "uid=0(root) gid=0(root)"
|
2016-06-30 08:22:27 -07:00
|
|
|
// Just check prefix because some distros reportedly output a
|
|
|
|
|
// context parameter; see https://golang.org/issue/16224.
|
2017-04-13 21:22:22 +00:00
|
|
|
// Alpine does not output groups; see https://golang.org/issue/19938.
|
2016-06-30 08:22:27 -07:00
|
|
|
if !strings.HasPrefix(strOut, expected) {
|
2022-03-31 16:51:32 -04:00
|
|
|
t.Errorf("expected prefix: %q", expected)
|
2016-05-27 15:02:31 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestGroupCleanupUserNamespace(t *testing.T) {
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("we need root for credential")
|
|
|
|
|
}
|
|
|
|
|
checkUserNS(t)
|
|
|
|
|
cmd := exec.Command("id")
|
|
|
|
|
uid, gid := os.Getuid(), os.Getgid()
|
|
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
|
Cloneflags: syscall.CLONE_NEWUSER,
|
|
|
|
|
Credential: &syscall.Credential{
|
|
|
|
|
Uid: uint32(uid),
|
|
|
|
|
Gid: uint32(gid),
|
|
|
|
|
},
|
|
|
|
|
UidMappings: []syscall.SysProcIDMap{
|
|
|
|
|
{ContainerID: 0, HostID: uid, Size: 1},
|
|
|
|
|
},
|
|
|
|
|
GidMappings: []syscall.SysProcIDMap{
|
|
|
|
|
{ContainerID: 0, HostID: gid, Size: 1},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
|
|
|
|
}
|
|
|
|
|
strOut := strings.TrimSpace(string(out))
|
2022-03-31 16:51:32 -04:00
|
|
|
t.Logf("id: %s", strOut)
|
2016-06-30 08:22:27 -07:00
|
|
|
|
2022-03-31 16:51:32 -04:00
|
|
|
// As in TestGroupCleanup, just check prefix.
|
|
|
|
|
// The actual groups and contexts seem to vary from one distro to the next.
|
|
|
|
|
expected := "uid=0(root) gid=0(root) groups=0(root)"
|
|
|
|
|
if !strings.HasPrefix(strOut, expected) {
|
|
|
|
|
t.Errorf("expected prefix: %q", expected)
|
2016-05-27 15:02:31 -07:00
|
|
|
}
|
|
|
|
|
}
|
os/exec: handle Unshareflags with CLONE_NEWNS
In some newer Linux distros, systemd forces
all mount namespaces to be shared, starting
at /. This disables the CLONE_NEWNS
flag in unshare(2) and clone(2).
While this problem is most commonly seen
on systems with systemd, it can happen anywhere,
due to how Linux namespaces now work.
Hence, to create a private mount namespace,
it is not sufficient to just set
CLONE_NEWS; you have to call mount(2) to change
the behavior of namespaces, i.e.
mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)
This is tested and working and we can now correctly
start child process with private namespaces on Linux
distros that use systemd.
The new test works correctly on Ubuntu 16.04.2 LTS.
It fails if I comment out the new Mount, and
succeeds otherwise. In each case it correctly
cleans up after itself.
Fixes #19661
Change-Id: I52240b59628e3772b529d9bbef7166606b0c157d
Reviewed-on: https://go-review.googlesource.com/38471
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-22 14:40:55 -07:00
|
|
|
|
|
|
|
|
// TestUnshareHelperProcess isn't a real test. It's used as a helper process
|
|
|
|
|
// for TestUnshareMountNameSpace.
|
|
|
|
|
func TestUnshareMountNameSpaceHelper(*testing.T) {
|
|
|
|
|
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
defer os.Exit(0)
|
|
|
|
|
if err := syscall.Mount("none", flag.Args()[0], "proc", 0, ""); err != nil {
|
|
|
|
|
fmt.Fprintf(os.Stderr, "unshare: mount %v failed: %v", os.Args, err)
|
|
|
|
|
os.Exit(2)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Test for Issue 38471: unshare fails because systemd has forced / to be shared
|
|
|
|
|
func TestUnshareMountNameSpace(t *testing.T) {
|
2017-08-23 11:49:22 -07:00
|
|
|
skipInContainer(t)
|
os/exec: handle Unshareflags with CLONE_NEWNS
In some newer Linux distros, systemd forces
all mount namespaces to be shared, starting
at /. This disables the CLONE_NEWNS
flag in unshare(2) and clone(2).
While this problem is most commonly seen
on systems with systemd, it can happen anywhere,
due to how Linux namespaces now work.
Hence, to create a private mount namespace,
it is not sufficient to just set
CLONE_NEWS; you have to call mount(2) to change
the behavior of namespaces, i.e.
mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)
This is tested and working and we can now correctly
start child process with private namespaces on Linux
distros that use systemd.
The new test works correctly on Ubuntu 16.04.2 LTS.
It fails if I comment out the new Mount, and
succeeds otherwise. In each case it correctly
cleans up after itself.
Fixes #19661
Change-Id: I52240b59628e3772b529d9bbef7166606b0c157d
Reviewed-on: https://go-review.googlesource.com/38471
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-22 14:40:55 -07:00
|
|
|
// Make sure we are running as root so we have permissions to use unshare
|
|
|
|
|
// and create a network namespace.
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("kernel prohibits unshare in unprivileged process, unless using user namespace")
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-29 14:17:47 -04:00
|
|
|
d, err := os.MkdirTemp("", "unshare")
|
os/exec: handle Unshareflags with CLONE_NEWNS
In some newer Linux distros, systemd forces
all mount namespaces to be shared, starting
at /. This disables the CLONE_NEWNS
flag in unshare(2) and clone(2).
While this problem is most commonly seen
on systems with systemd, it can happen anywhere,
due to how Linux namespaces now work.
Hence, to create a private mount namespace,
it is not sufficient to just set
CLONE_NEWS; you have to call mount(2) to change
the behavior of namespaces, i.e.
mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)
This is tested and working and we can now correctly
start child process with private namespaces on Linux
distros that use systemd.
The new test works correctly on Ubuntu 16.04.2 LTS.
It fails if I comment out the new Mount, and
succeeds otherwise. In each case it correctly
cleans up after itself.
Fixes #19661
Change-Id: I52240b59628e3772b529d9bbef7166606b0c157d
Reviewed-on: https://go-review.googlesource.com/38471
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-22 14:40:55 -07:00
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("tempdir: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cmd := exec.Command(os.Args[0], "-test.run=TestUnshareMountNameSpaceHelper", d)
|
2020-05-11 11:01:16 -07:00
|
|
|
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
|
os/exec: handle Unshareflags with CLONE_NEWNS
In some newer Linux distros, systemd forces
all mount namespaces to be shared, starting
at /. This disables the CLONE_NEWNS
flag in unshare(2) and clone(2).
While this problem is most commonly seen
on systems with systemd, it can happen anywhere,
due to how Linux namespaces now work.
Hence, to create a private mount namespace,
it is not sufficient to just set
CLONE_NEWS; you have to call mount(2) to change
the behavior of namespaces, i.e.
mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)
This is tested and working and we can now correctly
start child process with private namespaces on Linux
distros that use systemd.
The new test works correctly on Ubuntu 16.04.2 LTS.
It fails if I comment out the new Mount, and
succeeds otherwise. In each case it correctly
cleans up after itself.
Fixes #19661
Change-Id: I52240b59628e3772b529d9bbef7166606b0c157d
Reviewed-on: https://go-review.googlesource.com/38471
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-22 14:40:55 -07:00
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{Unshareflags: syscall.CLONE_NEWNS}
|
|
|
|
|
|
|
|
|
|
o, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
2017-03-30 18:00:10 -07:00
|
|
|
if strings.Contains(err.Error(), ": permission denied") {
|
|
|
|
|
t.Skipf("Skipping test (golang.org/issue/19698); unshare failed due to permissions: %s, %v", o, err)
|
|
|
|
|
}
|
|
|
|
|
t.Fatalf("unshare failed: %s, %v", o, err)
|
os/exec: handle Unshareflags with CLONE_NEWNS
In some newer Linux distros, systemd forces
all mount namespaces to be shared, starting
at /. This disables the CLONE_NEWNS
flag in unshare(2) and clone(2).
While this problem is most commonly seen
on systems with systemd, it can happen anywhere,
due to how Linux namespaces now work.
Hence, to create a private mount namespace,
it is not sufficient to just set
CLONE_NEWS; you have to call mount(2) to change
the behavior of namespaces, i.e.
mount("none", "/", NULL, MS_REC|MS_PRIVATE, NULL)
This is tested and working and we can now correctly
start child process with private namespaces on Linux
distros that use systemd.
The new test works correctly on Ubuntu 16.04.2 LTS.
It fails if I comment out the new Mount, and
succeeds otherwise. In each case it correctly
cleans up after itself.
Fixes #19661
Change-Id: I52240b59628e3772b529d9bbef7166606b0c157d
Reviewed-on: https://go-review.googlesource.com/38471
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-03-22 14:40:55 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// How do we tell if the namespace was really unshared? It turns out
|
|
|
|
|
// to be simple: just try to remove the directory. If it's still mounted
|
|
|
|
|
// on the rm will fail with EBUSY. Then we have some cleanup to do:
|
|
|
|
|
// we must unmount it, then try to remove it again.
|
|
|
|
|
|
|
|
|
|
if err := os.Remove(d); err != nil {
|
|
|
|
|
t.Errorf("rmdir failed on %v: %v", d, err)
|
|
|
|
|
if err := syscall.Unmount(d, syscall.MNT_FORCE); err != nil {
|
|
|
|
|
t.Errorf("Can't unmount %v: %v", d, err)
|
|
|
|
|
}
|
|
|
|
|
if err := os.Remove(d); err != nil {
|
|
|
|
|
t.Errorf("rmdir after unmount failed on %v: %v", d, err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-04-24 16:09:24 -07:00
|
|
|
|
|
|
|
|
// Test for Issue 20103: unshare fails when chroot is used
|
|
|
|
|
func TestUnshareMountNameSpaceChroot(t *testing.T) {
|
2017-08-23 11:49:22 -07:00
|
|
|
skipInContainer(t)
|
2017-04-24 16:09:24 -07:00
|
|
|
// Make sure we are running as root so we have permissions to use unshare
|
|
|
|
|
// and create a network namespace.
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("kernel prohibits unshare in unprivileged process, unless using user namespace")
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-29 14:17:47 -04:00
|
|
|
d, err := os.MkdirTemp("", "unshare")
|
2017-04-24 16:09:24 -07:00
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("tempdir: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Since we are doing a chroot, we need the binary there,
|
|
|
|
|
// and it must be statically linked.
|
|
|
|
|
x := filepath.Join(d, "syscall.test")
|
2017-07-12 21:31:30 +00:00
|
|
|
cmd := exec.Command(testenv.GoToolPath(t), "test", "-c", "-o", x, "syscall")
|
2017-04-24 16:09:24 -07:00
|
|
|
cmd.Env = append(os.Environ(), "CGO_ENABLED=0")
|
|
|
|
|
if o, err := cmd.CombinedOutput(); err != nil {
|
|
|
|
|
t.Fatalf("Build of syscall in chroot failed, output %v, err %v", o, err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cmd = exec.Command("/syscall.test", "-test.run=TestUnshareMountNameSpaceHelper", "/")
|
2020-05-11 11:01:16 -07:00
|
|
|
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
|
2017-04-24 16:09:24 -07:00
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{Chroot: d, Unshareflags: syscall.CLONE_NEWNS}
|
|
|
|
|
|
|
|
|
|
o, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
|
|
|
|
if strings.Contains(err.Error(), ": permission denied") {
|
|
|
|
|
t.Skipf("Skipping test (golang.org/issue/19698); unshare failed due to permissions: %s, %v", o, err)
|
|
|
|
|
}
|
|
|
|
|
t.Fatalf("unshare failed: %s, %v", o, err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// How do we tell if the namespace was really unshared? It turns out
|
|
|
|
|
// to be simple: just try to remove the executable. If it's still mounted
|
|
|
|
|
// on, the rm will fail. Then we have some cleanup to do:
|
|
|
|
|
// we must force unmount it, then try to remove it again.
|
|
|
|
|
|
|
|
|
|
if err := os.Remove(x); err != nil {
|
|
|
|
|
t.Errorf("rm failed on %v: %v", x, err)
|
|
|
|
|
if err := syscall.Unmount(d, syscall.MNT_FORCE); err != nil {
|
|
|
|
|
t.Fatalf("Can't unmount %v: %v", d, err)
|
|
|
|
|
}
|
|
|
|
|
if err := os.Remove(x); err != nil {
|
|
|
|
|
t.Fatalf("rm failed on %v: %v", x, err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := os.Remove(d); err != nil {
|
|
|
|
|
t.Errorf("rmdir failed on %v: %v", d, err)
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-05-17 02:05:32 -07:00
|
|
|
|
2019-01-17 16:53:41 +01:00
|
|
|
func TestUnshareUidGidMappingHelper(*testing.T) {
|
|
|
|
|
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
defer os.Exit(0)
|
|
|
|
|
if err := syscall.Chroot(os.TempDir()); err != nil {
|
|
|
|
|
fmt.Fprintln(os.Stderr, err)
|
|
|
|
|
os.Exit(2)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Test for Issue 29789: unshare fails when uid/gid mapping is specified
|
|
|
|
|
func TestUnshareUidGidMapping(t *testing.T) {
|
|
|
|
|
if os.Getuid() == 0 {
|
|
|
|
|
t.Skip("test exercises unprivileged user namespace, fails with privileges")
|
|
|
|
|
}
|
|
|
|
|
checkUserNS(t)
|
|
|
|
|
cmd := exec.Command(os.Args[0], "-test.run=TestUnshareUidGidMappingHelper")
|
|
|
|
|
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
|
|
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
|
Unshareflags: syscall.CLONE_NEWNS | syscall.CLONE_NEWUSER,
|
|
|
|
|
GidMappingsEnableSetgroups: false,
|
|
|
|
|
UidMappings: []syscall.SysProcIDMap{
|
|
|
|
|
{
|
|
|
|
|
ContainerID: 0,
|
|
|
|
|
HostID: syscall.Getuid(),
|
|
|
|
|
Size: 1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
GidMappings: []syscall.SysProcIDMap{
|
|
|
|
|
{
|
|
|
|
|
ContainerID: 0,
|
|
|
|
|
HostID: syscall.Getgid(),
|
|
|
|
|
Size: 1,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
syscall: add CgroupFD support for ForkExec on Linux
Implement CLONE_INTO_CGROUP feature, allowing to put a child in a
specified cgroup in a clean and simple way. Note that the feature only
works for cgroup v2, and requires Linux kernel 5.7 or newer.
Using the feature requires a new syscall, clone3. Currently this is the
only reason to use clone3, but the code is structured in a way so that
other cases may be easily added in the future.
Add a test case.
While at it, try to simplify the syscall calling code in
forkAndExecInChild1, which became complicated over time because:
1. It was using either rawVforkSyscall or RawSyscall6 depending on
whether CLONE_NEWUSER was set.
2. On Linux/s390, the first two arguments to clone(2) system call are
swapped (which deserved a mention in Linux ABI hall of shame). It
was worked around in rawVforkSyscall on s390, but had to be
implemented via a switch/case when using RawSyscall6, making the code
less clear.
Let's
- modify rawVforkSyscall to have two arguments (which is also required
for clone3);
- remove the arguments workaround from s390 asm, instead implementing
arguments swap in the caller (which still looks ugly but at least
it's done once and is clearly documented now);
- use rawVforkSyscall for all cases (since it is essentially similar to
RawSyscall6, except for having less parameters, not returning r2, and
saving/restoring the return address before/after syscall on 386 and
amd64).
Updates #51246.
Change-Id: Ifcd418ebead9257177338ffbcccd0bdecb94474e
Reviewed-on: https://go-review.googlesource.com/c/go/+/417695
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
2022-07-14 21:18:15 -07:00
|
|
|
func prepareCgroupFD(t *testing.T) (int, string) {
|
|
|
|
|
t.Helper()
|
|
|
|
|
|
|
|
|
|
const O_PATH = 0x200000 // Same for all architectures, but for some reason not defined in syscall for 386||amd64.
|
|
|
|
|
|
|
|
|
|
// Requires cgroup v2.
|
|
|
|
|
const prefix = "/sys/fs/cgroup"
|
|
|
|
|
selfCg, err := os.ReadFile("/proc/self/cgroup")
|
|
|
|
|
if err != nil {
|
|
|
|
|
if os.IsNotExist(err) || os.IsPermission(err) {
|
|
|
|
|
t.Skip(err)
|
|
|
|
|
}
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Expect a single line like this:
|
|
|
|
|
// 0::/user.slice/user-1000.slice/user@1000.service/app.slice/vte-spawn-891992a2-efbb-4f28-aedb-b24f9e706770.scope
|
|
|
|
|
// Otherwise it's either cgroup v1 or a hybrid hierarchy.
|
|
|
|
|
if bytes.Count(selfCg, []byte("\n")) > 1 {
|
|
|
|
|
t.Skip("cgroup v2 not available")
|
|
|
|
|
}
|
|
|
|
|
cg := bytes.TrimPrefix(selfCg, []byte("0::"))
|
|
|
|
|
if len(cg) == len(selfCg) { // No prefix found.
|
|
|
|
|
t.Skipf("cgroup v2 not available (/proc/self/cgroup contents: %q)", selfCg)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Need clone3 with CLONE_INTO_CGROUP support.
|
|
|
|
|
_, err = syscall.ForkExec("non-existent binary", nil, &syscall.ProcAttr{
|
|
|
|
|
Sys: &syscall.SysProcAttr{
|
|
|
|
|
UseCgroupFD: true,
|
|
|
|
|
CgroupFD: -1,
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
// // EPERM can be returned if clone3 is not enabled by seccomp.
|
|
|
|
|
if err == syscall.ENOSYS || err == syscall.EPERM {
|
|
|
|
|
t.Skipf("clone3 with CLONE_INTO_CGROUP not available: %v", err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Need an ability to create a sub-cgroup.
|
|
|
|
|
subCgroup, err := os.MkdirTemp(prefix+string(bytes.TrimSpace(cg)), "subcg-")
|
|
|
|
|
if err != nil {
|
|
|
|
|
if os.IsPermission(err) {
|
|
|
|
|
t.Skip(err)
|
|
|
|
|
}
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
t.Cleanup(func() { syscall.Rmdir(subCgroup) })
|
|
|
|
|
|
|
|
|
|
cgroupFD, err := syscall.Open(subCgroup, O_PATH, 0)
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(&os.PathError{Op: "open", Path: subCgroup, Err: err})
|
|
|
|
|
}
|
|
|
|
|
t.Cleanup(func() { syscall.Close(cgroupFD) })
|
|
|
|
|
|
|
|
|
|
return cgroupFD, "/" + path.Base(subCgroup)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestUseCgroupFD(t *testing.T) {
|
|
|
|
|
fd, suffix := prepareCgroupFD(t)
|
|
|
|
|
|
|
|
|
|
cmd := exec.Command(os.Args[0], "-test.run=TestUseCgroupFDHelper")
|
|
|
|
|
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
|
|
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
|
UseCgroupFD: true,
|
|
|
|
|
CgroupFD: fd,
|
|
|
|
|
}
|
|
|
|
|
out, err := cmd.CombinedOutput()
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
|
|
|
|
}
|
|
|
|
|
// NB: this wouldn't work with cgroupns.
|
|
|
|
|
if !bytes.HasSuffix(bytes.TrimSpace(out), []byte(suffix)) {
|
|
|
|
|
t.Fatalf("got: %q, want: a line that ends with %q", out, suffix)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestUseCgroupFDHelper(*testing.T) {
|
|
|
|
|
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
defer os.Exit(0)
|
|
|
|
|
// Read and print own cgroup path.
|
|
|
|
|
selfCg, err := os.ReadFile("/proc/self/cgroup")
|
|
|
|
|
if err != nil {
|
|
|
|
|
fmt.Fprintln(os.Stderr, err)
|
|
|
|
|
os.Exit(2)
|
|
|
|
|
}
|
|
|
|
|
fmt.Print(string(selfCg))
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-17 02:05:32 -07:00
|
|
|
type capHeader struct {
|
|
|
|
|
version uint32
|
2019-01-07 10:18:42 -08:00
|
|
|
pid int32
|
2017-05-17 02:05:32 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type capData struct {
|
|
|
|
|
effective uint32
|
|
|
|
|
permitted uint32
|
|
|
|
|
inheritable uint32
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const CAP_SYS_TIME = 25
|
2019-01-07 10:18:42 -08:00
|
|
|
const CAP_SYSLOG = 34
|
2017-05-17 02:05:32 -07:00
|
|
|
|
|
|
|
|
type caps struct {
|
|
|
|
|
hdr capHeader
|
|
|
|
|
data [2]capData
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func getCaps() (caps, error) {
|
|
|
|
|
var c caps
|
|
|
|
|
|
|
|
|
|
// Get capability version
|
|
|
|
|
if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&c.hdr)), uintptr(unsafe.Pointer(nil)), 0); errno != 0 {
|
|
|
|
|
return c, fmt.Errorf("SYS_CAPGET: %v", errno)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get current capabilities
|
|
|
|
|
if _, _, errno := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(&c.hdr)), uintptr(unsafe.Pointer(&c.data[0])), 0); errno != 0 {
|
|
|
|
|
return c, fmt.Errorf("SYS_CAPGET: %v", errno)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return c, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func mustSupportAmbientCaps(t *testing.T) {
|
|
|
|
|
var uname syscall.Utsname
|
|
|
|
|
if err := syscall.Uname(&uname); err != nil {
|
|
|
|
|
t.Fatalf("Uname: %v", err)
|
|
|
|
|
}
|
|
|
|
|
var buf [65]byte
|
|
|
|
|
for i, b := range uname.Release {
|
|
|
|
|
buf[i] = byte(b)
|
|
|
|
|
}
|
|
|
|
|
ver := string(buf[:])
|
2021-09-22 10:46:32 -04:00
|
|
|
ver, _, _ = strings.Cut(ver, "\x00")
|
2017-05-17 02:05:32 -07:00
|
|
|
if strings.HasPrefix(ver, "2.") ||
|
|
|
|
|
strings.HasPrefix(ver, "3.") ||
|
|
|
|
|
strings.HasPrefix(ver, "4.1.") ||
|
|
|
|
|
strings.HasPrefix(ver, "4.2.") {
|
|
|
|
|
t.Skipf("kernel version %q predates required 4.3; skipping test", ver)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TestAmbientCapsHelper isn't a real test. It's used as a helper process for
|
|
|
|
|
// TestAmbientCaps.
|
|
|
|
|
func TestAmbientCapsHelper(*testing.T) {
|
|
|
|
|
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
defer os.Exit(0)
|
|
|
|
|
|
|
|
|
|
caps, err := getCaps()
|
|
|
|
|
if err != nil {
|
|
|
|
|
fmt.Fprintln(os.Stderr, err)
|
|
|
|
|
os.Exit(2)
|
|
|
|
|
}
|
|
|
|
|
if caps.data[0].effective&(1<<uint(CAP_SYS_TIME)) == 0 {
|
|
|
|
|
fmt.Fprintln(os.Stderr, "CAP_SYS_TIME unexpectedly not in the effective capability mask")
|
|
|
|
|
os.Exit(2)
|
|
|
|
|
}
|
2019-01-07 10:18:42 -08:00
|
|
|
if caps.data[1].effective&(1<<uint(CAP_SYSLOG&31)) == 0 {
|
|
|
|
|
fmt.Fprintln(os.Stderr, "CAP_SYSLOG unexpectedly not in the effective capability mask")
|
|
|
|
|
os.Exit(2)
|
|
|
|
|
}
|
2017-05-17 02:05:32 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestAmbientCaps(t *testing.T) {
|
|
|
|
|
// Make sure we are running as root so we have permissions to use unshare
|
|
|
|
|
// and create a network namespace.
|
|
|
|
|
if os.Getuid() != 0 {
|
|
|
|
|
t.Skip("kernel prohibits unshare in unprivileged process, unless using user namespace")
|
|
|
|
|
}
|
2019-01-07 10:18:42 -08:00
|
|
|
|
|
|
|
|
testAmbientCaps(t, false)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func TestAmbientCapsUserns(t *testing.T) {
|
2019-09-19 17:16:59 +02:00
|
|
|
checkUserNS(t)
|
2019-01-07 10:18:42 -08:00
|
|
|
testAmbientCaps(t, true)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func testAmbientCaps(t *testing.T, userns bool) {
|
|
|
|
|
skipInContainer(t)
|
2017-05-17 02:05:32 -07:00
|
|
|
mustSupportAmbientCaps(t)
|
|
|
|
|
|
2019-06-06 18:07:13 +08:00
|
|
|
skipUnprivilegedUserClone(t)
|
2019-03-09 18:01:26 +01:00
|
|
|
|
2018-08-29 03:24:13 +00:00
|
|
|
// skip on android, due to lack of lookup support
|
|
|
|
|
if runtime.GOOS == "android" {
|
|
|
|
|
t.Skip("skipping test on android; see Issue 27327")
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-17 02:05:32 -07:00
|
|
|
u, err := user.Lookup("nobody")
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
uid, err := strconv.ParseInt(u.Uid, 0, 32)
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
gid, err := strconv.ParseInt(u.Gid, 0, 32)
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copy the test binary to a temporary location which is readable by nobody.
|
2020-10-29 14:17:47 -04:00
|
|
|
f, err := os.CreateTemp("", "gotest")
|
2017-05-17 02:05:32 -07:00
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
defer os.Remove(f.Name())
|
|
|
|
|
defer f.Close()
|
|
|
|
|
e, err := os.Open(os.Args[0])
|
|
|
|
|
if err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
defer e.Close()
|
|
|
|
|
if _, err := io.Copy(f, e); err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
if err := f.Chmod(0755); err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
if err := f.Close(); err != nil {
|
|
|
|
|
t.Fatal(err)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cmd := exec.Command(f.Name(), "-test.run=TestAmbientCapsHelper")
|
2020-05-11 11:01:16 -07:00
|
|
|
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
|
2017-05-17 02:05:32 -07:00
|
|
|
cmd.Stdout = os.Stdout
|
|
|
|
|
cmd.Stderr = os.Stderr
|
|
|
|
|
cmd.SysProcAttr = &syscall.SysProcAttr{
|
|
|
|
|
Credential: &syscall.Credential{
|
|
|
|
|
Uid: uint32(uid),
|
|
|
|
|
Gid: uint32(gid),
|
|
|
|
|
},
|
2019-01-07 10:18:42 -08:00
|
|
|
AmbientCaps: []uintptr{CAP_SYS_TIME, CAP_SYSLOG},
|
|
|
|
|
}
|
|
|
|
|
if userns {
|
|
|
|
|
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWUSER
|
|
|
|
|
const nobody = 65534
|
|
|
|
|
uid := os.Getuid()
|
|
|
|
|
gid := os.Getgid()
|
|
|
|
|
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{{
|
|
|
|
|
ContainerID: int(nobody),
|
|
|
|
|
HostID: int(uid),
|
|
|
|
|
Size: int(1),
|
|
|
|
|
}}
|
|
|
|
|
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{{
|
|
|
|
|
ContainerID: int(nobody),
|
|
|
|
|
HostID: int(gid),
|
|
|
|
|
Size: int(1),
|
|
|
|
|
}}
|
|
|
|
|
|
|
|
|
|
// Set credentials to run as user and group nobody.
|
|
|
|
|
cmd.SysProcAttr.Credential = &syscall.Credential{
|
|
|
|
|
Uid: nobody,
|
|
|
|
|
Gid: nobody,
|
|
|
|
|
}
|
2017-05-17 02:05:32 -07:00
|
|
|
}
|
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
|
|
|
t.Fatal(err.Error())
|
|
|
|
|
}
|
|
|
|
|
}
|