mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
syscall: add CgroupFD support for ForkExec on Linux
Implement CLONE_INTO_CGROUP feature, allowing to put a child in a specified cgroup in a clean and simple way. Note that the feature only works for cgroup v2, and requires Linux kernel 5.7 or newer. Using the feature requires a new syscall, clone3. Currently this is the only reason to use clone3, but the code is structured in a way so that other cases may be easily added in the future. Add a test case. While at it, try to simplify the syscall calling code in forkAndExecInChild1, which became complicated over time because: 1. It was using either rawVforkSyscall or RawSyscall6 depending on whether CLONE_NEWUSER was set. 2. On Linux/s390, the first two arguments to clone(2) system call are swapped (which deserved a mention in Linux ABI hall of shame). It was worked around in rawVforkSyscall on s390, but had to be implemented via a switch/case when using RawSyscall6, making the code less clear. Let's - modify rawVforkSyscall to have two arguments (which is also required for clone3); - remove the arguments workaround from s390 asm, instead implementing arguments swap in the caller (which still looks ugly but at least it's done once and is clearly documented now); - use rawVforkSyscall for all cases (since it is essentially similar to RawSyscall6, except for having less parameters, not returning r2, and saving/restoring the return address before/after syscall on 386 and amd64). Updates #51246. Change-Id: Ifcd418ebead9257177338ffbcccd0bdecb94474e Reviewed-on: https://go-review.googlesource.com/c/go/+/417695 Auto-Submit: Ian Lance Taylor <iant@google.com> Reviewed-by: Michael Knyszek <mknyszek@google.com> Reviewed-by: Ian Lance Taylor <iant@google.com> Run-TryBot: Ian Lance Taylor <iant@google.com> Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com> TryBot-Result: Gopher Robot <gobot@golang.org>
This commit is contained in:
parent
f53b2111e4
commit
bca17d16ca
24 changed files with 228 additions and 99 deletions
|
|
@ -7,6 +7,7 @@
|
|||
package syscall_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"internal/testenv"
|
||||
|
|
@ -14,6 +15,7 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"os/user"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
|
@ -461,6 +463,96 @@ func TestUnshareUidGidMapping(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func prepareCgroupFD(t *testing.T) (int, string) {
|
||||
t.Helper()
|
||||
|
||||
const O_PATH = 0x200000 // Same for all architectures, but for some reason not defined in syscall for 386||amd64.
|
||||
|
||||
// Requires cgroup v2.
|
||||
const prefix = "/sys/fs/cgroup"
|
||||
selfCg, err := os.ReadFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) || os.IsPermission(err) {
|
||||
t.Skip(err)
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Expect a single line like this:
|
||||
// 0::/user.slice/user-1000.slice/user@1000.service/app.slice/vte-spawn-891992a2-efbb-4f28-aedb-b24f9e706770.scope
|
||||
// Otherwise it's either cgroup v1 or a hybrid hierarchy.
|
||||
if bytes.Count(selfCg, []byte("\n")) > 1 {
|
||||
t.Skip("cgroup v2 not available")
|
||||
}
|
||||
cg := bytes.TrimPrefix(selfCg, []byte("0::"))
|
||||
if len(cg) == len(selfCg) { // No prefix found.
|
||||
t.Skipf("cgroup v2 not available (/proc/self/cgroup contents: %q)", selfCg)
|
||||
}
|
||||
|
||||
// Need clone3 with CLONE_INTO_CGROUP support.
|
||||
_, err = syscall.ForkExec("non-existent binary", nil, &syscall.ProcAttr{
|
||||
Sys: &syscall.SysProcAttr{
|
||||
UseCgroupFD: true,
|
||||
CgroupFD: -1,
|
||||
},
|
||||
})
|
||||
// // EPERM can be returned if clone3 is not enabled by seccomp.
|
||||
if err == syscall.ENOSYS || err == syscall.EPERM {
|
||||
t.Skipf("clone3 with CLONE_INTO_CGROUP not available: %v", err)
|
||||
}
|
||||
|
||||
// Need an ability to create a sub-cgroup.
|
||||
subCgroup, err := os.MkdirTemp(prefix+string(bytes.TrimSpace(cg)), "subcg-")
|
||||
if err != nil {
|
||||
if os.IsPermission(err) {
|
||||
t.Skip(err)
|
||||
}
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Cleanup(func() { syscall.Rmdir(subCgroup) })
|
||||
|
||||
cgroupFD, err := syscall.Open(subCgroup, O_PATH, 0)
|
||||
if err != nil {
|
||||
t.Fatal(&os.PathError{Op: "open", Path: subCgroup, Err: err})
|
||||
}
|
||||
t.Cleanup(func() { syscall.Close(cgroupFD) })
|
||||
|
||||
return cgroupFD, "/" + path.Base(subCgroup)
|
||||
}
|
||||
|
||||
func TestUseCgroupFD(t *testing.T) {
|
||||
fd, suffix := prepareCgroupFD(t)
|
||||
|
||||
cmd := exec.Command(os.Args[0], "-test.run=TestUseCgroupFDHelper")
|
||||
cmd.Env = append(os.Environ(), "GO_WANT_HELPER_PROCESS=1")
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
UseCgroupFD: true,
|
||||
CgroupFD: fd,
|
||||
}
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
t.Fatalf("Cmd failed with err %v, output: %s", err, out)
|
||||
}
|
||||
// NB: this wouldn't work with cgroupns.
|
||||
if !bytes.HasSuffix(bytes.TrimSpace(out), []byte(suffix)) {
|
||||
t.Fatalf("got: %q, want: a line that ends with %q", out, suffix)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUseCgroupFDHelper(*testing.T) {
|
||||
if os.Getenv("GO_WANT_HELPER_PROCESS") != "1" {
|
||||
return
|
||||
}
|
||||
defer os.Exit(0)
|
||||
// Read and print own cgroup path.
|
||||
selfCg, err := os.ReadFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(2)
|
||||
}
|
||||
fmt.Print(string(selfCg))
|
||||
}
|
||||
|
||||
type capHeader struct {
|
||||
version uint32
|
||||
pid int32
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue