diff --git a/src/internal/cgrouptest/cgrouptest_linux.go b/src/internal/cgrouptest/cgrouptest_linux.go index 8437f992f74..ad9599c9383 100644 --- a/src/internal/cgrouptest/cgrouptest_linux.go +++ b/src/internal/cgrouptest/cgrouptest_linux.go @@ -50,9 +50,8 @@ func (c *CgroupV2) SetCPUMax(quota, period int64) error { // // This must not be used in parallel tests, as it affects the entire process. func InCgroupV2(t *testing.T, fn func(*CgroupV2)) { - mount, rel := findCurrent(t) - parent := findOwnedParent(t, mount, rel) - orig := filepath.Join(mount, rel) + orig := findCurrent(t) + parent := findOwnedParent(t, orig) // Make sure the parent allows children to control cpu. b, err := os.ReadFile(filepath.Join(parent, "cgroup.subtree_control")) @@ -93,34 +92,25 @@ func InCgroupV2(t *testing.T, fn func(*CgroupV2)) { fn(c) } -// Returns the mount and relative directory of the current cgroup the process -// is in. -func findCurrent(t *testing.T) (string, string) { +// Returns the filesystem path to the current cgroup the process is in. +func findCurrent(t *testing.T) string { // Find the path to our current CPU cgroup. Currently this package is // only used for CPU cgroup testing, so the distinction of different // controllers doesn't matter. var scratch [cgroup.ParseSize]byte buf := make([]byte, cgroup.PathSize) - n, err := cgroup.FindCPUMountPoint(buf, scratch[:]) + n, ver, err := cgroup.FindCPU(buf, scratch[:]) if err != nil { t.Skipf("cgroup: unable to find current cgroup mount: %v", err) } - mount := string(buf[:n]) - - n, ver, err := cgroup.FindCPURelativePath(buf, scratch[:]) - if err != nil { - t.Skipf("cgroup: unable to find current cgroup path: %v", err) - } if ver != cgroup.V2 { t.Skipf("cgroup: running on cgroup v%d want v2", ver) } - rel := string(buf[1:n]) // The returned path always starts with /, skip it. - rel = filepath.Join(".", rel) // Make sure this isn't empty string at root. - return mount, rel + return string(buf[:n]) } // Returns a parent directory in which we can create our own cgroup subdirectory. -func findOwnedParent(t *testing.T, mount, rel string) string { +func findOwnedParent(t *testing.T, orig string) string { // There are many ways cgroups may be set up on a system. We don't try // to cover all of them, just common ones. // @@ -142,7 +132,7 @@ func findOwnedParent(t *testing.T, mount, rel string) string { // We want to create our own subdirectory that we can migrate into and // then manipulate at will. It is tempting to create a new subdirectory - // inside the current cgroup we are already in, however that will likey + // inside the current cgroup we are already in, however that will likely // not work. cgroup v2 only allows processes to be in leaf cgroups. Our // current cgroup likely contains multiple processes (at least this one // and the cmd/go test runner). If we make a subdirectory and try to @@ -166,27 +156,29 @@ func findOwnedParent(t *testing.T, mount, rel string) string { // is empty. As far as I tell, the only purpose of this is to allow // reorganizing processes into a new set of subdirectories and then // adding controllers once done. - root, err := os.OpenRoot(mount) + var stat syscall.Stat_t + err := syscall.Stat(orig, &stat) if err != nil { - t.Fatalf("error opening cgroup mount root: %v", err) + t.Fatalf("error stating orig cgroup: %v", err) } uid := os.Getuid() var prev string - for rel != "." { - fi, err := root.Stat(rel) + cur := filepath.Dir(orig) + for cur != "/" { + var curStat syscall.Stat_t + err = syscall.Stat(cur, &curStat) if err != nil { t.Fatalf("error stating cgroup path: %v", err) } - st := fi.Sys().(*syscall.Stat_t) - if int(st.Uid) != uid { - // Stop at first directory we don't own. + if int(curStat.Uid) != uid || curStat.Dev != stat.Dev { + // Stop at first directory we don't own or filesystem boundary. break } - prev = rel - rel = filepath.Join(rel, "..") + prev = cur + cur = filepath.Dir(cur) } if prev == "" { @@ -194,7 +186,7 @@ func findOwnedParent(t *testing.T, mount, rel string) string { } // We actually want the last directory where we were the owner. - return filepath.Join(mount, prev) + return prev } // Migrate the current process to the cgroup directory dst. diff --git a/src/internal/runtime/cgroup/cgroup.go b/src/internal/runtime/cgroup/cgroup.go index 68c31fcbc3b..09519af1e10 100644 --- a/src/internal/runtime/cgroup/cgroup.go +++ b/src/internal/runtime/cgroup/cgroup.go @@ -102,21 +102,23 @@ func parseV2Limit(buf []byte) (float64, bool, error) { return float64(quota) / float64(period), true, nil } -// Finds the path of the current process's CPU cgroup relative to the cgroup -// mount and writes it to out. +// Finds the path of the current process's CPU cgroup and writes it to out. // +// fd is a file descriptor for /proc/self/cgroup. // Returns the number of bytes written and the cgroup version (1 or 2). -func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) { +func parseCPUCgroup(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) { // The format of each line is // // hierarchy-ID:controller-list:cgroup-path // // controller-list is comma-separated. - // See man 5 cgroup for more details. // // cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that // is the CPU controller. Otherwise the v2 hierarchy (if any) is the - // CPU controller. + // CPU controller. It is not possible to mount the same controller + // simultaneously under both the v1 and the v2 hierarchies. + // + // See man 7 cgroups for more details. // // hierarchy-ID and controller-list have relatively small maximum // sizes, and the path can be up to _PATH_MAX, so we need a bit more @@ -149,7 +151,7 @@ func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), ou // hierarchy-ID:controller-list:cgroup-path // // controller-list is comma-separated. - // See man 5 cgroup for more details. + // See man 7 cgroups for more details. i := bytealg.IndexByte(line, ':') if i < 0 { return 0, 0, errMalformedFile @@ -167,6 +169,15 @@ func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), ou line = line[i+1:] path := line + if len(path) == 0 || path[0] != '/' { + // We rely on this when composing the full path. + return 0, 0, errMalformedFile + } + if len(path) > len(out) { + // Should not be possible. If we really get a very long cgroup path, + // read /proc/self/cgroup will fail with ENAMETOOLONG. + return 0, 0, errPathTooLong + } if string(hierarchy) == "0" { // v2 hierarchy. @@ -214,9 +225,11 @@ func containsCPU(b []byte) bool { return false } -// Returns the mount point for the cpu cgroup controller (v1 or v2) from -// /proc/self/mountinfo. -func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, error) { +// Returns the path to the specified cgroup and version with cpu controller +// +// fd is a file descriptor for /proc/self/mountinfo. +// Returns the number of bytes written. +func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out, cgroup []byte, version Version, scratch []byte) (int, error) { // The format of each line is: // // 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue @@ -240,8 +253,13 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt // carriage return. Those are escaped. See Linux show_mountinfo -> // show_path. We must unescape before returning. // - // We return the mount point (5) if the filesystem type (9) is cgroup2, - // or cgroup with "cpu" in the super options (11). + // A mount point matches if the filesystem type (9) is cgroup2, + // or cgroup with "cpu" in the super options (11), + // and the cgroup is in the root (4). If there are multiple matches, + // the first one is selected. + // + // We return full cgroup path, which is the mount point (5) + + // cgroup parameter without the root (4) prefix. // // (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a // small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space. @@ -250,11 +268,7 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt l := newLineReader(fd, scratch, read) - // Bytes written to out. - n := 0 - for { - //incomplete := false err := l.next() if err == errIncompleteLine { // An incomplete line is fine as long as it doesn't @@ -271,8 +285,8 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt line := l.line() - // Skip first four fields. - for range 4 { + // Skip first three fields. + for range 3 { i := bytealg.IndexByte(line, ' ') if i < 0 { return 0, errMalformedFile @@ -280,11 +294,23 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt line = line[i+1:] } - // (5) mount point: mount point relative to the process's root + // (4) root: root of the mount within the filesystem i := bytealg.IndexByte(line, ' ') if i < 0 { return 0, errMalformedFile } + root := line[:i] + if len(root) == 0 || root[0] != '/' { + // We rely on this in hasPathPrefix. + return 0, errMalformedFile + } + line = line[i+1:] + + // (5) mount point: mount point relative to the process's root + i = bytealg.IndexByte(line, ' ') + if i < 0 { + return 0, errMalformedFile + } mnt := line[:i] line = line[i+1:] @@ -313,53 +339,103 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt ftype := line[:i] line = line[i+1:] - if string(ftype) != "cgroup" && string(ftype) != "cgroup2" { - continue - } - - // As in findCPUPath, cgroup v1 with a CPU controller takes - // precendence over cgroup v2. - if string(ftype) == "cgroup2" { - // v2 hierarchy. - n, err = unescapePath(out, mnt) - if err != nil { - // Don't keep searching on error. The kernel - // should never produce broken escaping. - return n, err + switch version { + case V1: + if string(ftype) != "cgroup" { + continue } - // Keep searching, we might find a v1 hierarchy with a - // CPU controller, which takes precedence. + // (10) mount source: filesystem specific information or "none" + i = bytealg.IndexByte(line, ' ') + if i < 0 { + return 0, errMalformedFile + } + // Don't care about mount source. + line = line[i+1:] + + // (11) super options: per super block options + if !containsCPU(line) { + continue + } + case V2: + if string(ftype) != "cgroup2" { + continue + } + default: + throw("impossible cgroup version") + panic("unreachable") + } + + // Check cgroup is in the root. + // If the cgroup is /sandbox/container, the matching mount point root could be + // /sandbox/container, /sandbox, or / + rootLen, err := unescapePath(root, root) + if err != nil { + return 0, err + } + root = root[:rootLen] + if !hasPathPrefix(cgroup, root) { + continue // not matched, this is not the mount point we're looking for + } + + // Cutoff the root from cgroup, ensure rel starts with '/' or is empty. + rel := cgroup[rootLen:] + if rootLen == 1 && len(cgroup) > 1 { + // root is "/", but cgroup is not. Keep full cgroup path. + rel = cgroup + } + if hasPathPrefix(rel, []byte("/..")) { + // the cgroup is out of current cgroup namespace, and this mount point + // cannot reach that cgroup. + // + // e.g. If the process is in cgroup /init, but in a cgroup namespace + // rooted at /sandbox/container, /proc/self/cgroup will show /../../init. + // we can reach it if the mount point root is + // /../.. or /../../init, but not if it is /.. or / + // While mount point with root /../../.. should able to reach the cgroup, + // we don't know the path to the cgroup within that mount point. continue } - // (10) mount source: filesystem specific information or "none" - i = bytealg.IndexByte(line, ' ') - if i < 0 { - return 0, errMalformedFile + // All conditions met, compose the full path. + // Copy rel to the correct place first, it may overlap with out. + n := unescapedLen(mnt) + if n+len(rel) > len(out) { + return 0, errPathTooLong } - // Don't care about mount source. - line = line[i+1:] - - // (11) super options: per super block options - superOpt := line - - // v1 hierarchy - if containsCPU(superOpt) { - // Found a v1 CPU controller. This must be the - // only one, so we're done. - return unescapePath(out, mnt) + copy(out[n:], rel) + n2, err := unescapePath(out[:n], mnt) + if err != nil { + return 0, err } + if n2 != n { + throw("wrong unescaped len") + } + return n + len(rel), nil } - if n == 0 { - // Found nothing. - return 0, ErrNoCgroup - } - - return n, nil + // Found nothing. + return 0, ErrNoCgroup } -var errInvalidEscape error = stringError("invalid path escape sequence") +func hasPathPrefix(p, prefix []byte) bool { + i := len(prefix) + if i == 1 { + return true // root contains everything + } + if len(p) < i || !bytealg.Equal(prefix, p[:i]) { + return false + } + return len(p) == i || p[i] == '/' // must match at path boundary +} + +var ( + errInvalidEscape error = stringError("invalid path escape sequence") + errPathTooLong error = stringError("path too long") +) + +func unescapedLen(in []byte) int { + return len(in) - bytealg.Count(in, byte('\\'))*3 +} // unescapePath copies in to out, unescaping escape sequences generated by // Linux's show_path. @@ -367,20 +443,21 @@ var errInvalidEscape error = stringError("invalid path escape sequence") // That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences, // like '\040' for space. // -// out must be at least as large as in. +// Caller must ensure that out at least has unescapedLen(in) bytes. +// in and out may alias; in-place unescaping is supported. // // Returns the number of bytes written to out. // // Also see escapePath in cgroup_linux_test.go. func unescapePath(out []byte, in []byte) (int, error) { - // Not strictly necessary, but simplifies the implementation and will - // always hold in users. - if len(out) < len(in) { - throw("output too small") - } - var outi, ini int for ini < len(in) { + if outi >= len(out) { + // given that caller already ensured out is long enough, this + // is only possible if there are malformed escape sequences + // we have not parsed yet. + return outi, errInvalidEscape + } c := in[ini] if c != '\\' { out[outi] = c diff --git a/src/internal/runtime/cgroup/cgroup_linux.go b/src/internal/runtime/cgroup/cgroup_linux.go index 5e3ee0d2c2c..d9add2188ce 100644 --- a/src/internal/runtime/cgroup/cgroup_linux.go +++ b/src/internal/runtime/cgroup/cgroup_linux.go @@ -211,44 +211,26 @@ func FindCPU(out []byte, scratch []byte) (int, Version, error) { checkBufferSize(scratch, ParseSize) // The cgroup path is + . - // - // This is racy if our cgroup is changed while this runs. For example, - // initially there is only a cgroup v2 mount and we are not in a - // cgroup. After, there a cgroup v1 mount with a CPU controller and we - // are placed in a cgroup in this hierarchy. In that case, findCPUMount - // could pick the v2 mount, and findCPURelativePath could find the v2 - // relative path. - // - // In this case we'll later fail to read the cgroup files and fall back - // to assuming no cgroup. + // relative path is the cgroup relative to the mount root. - n, err := FindCPUMountPoint(out, scratch) + n, version, err := FindCPUCgroup(out, scratch) if err != nil { return 0, 0, err } - // The relative path always starts with /, so we can directly append it - // to the mount point. - n2, version, err := FindCPURelativePath(out[n:], scratch) - if err != nil { - return 0, 0, err - } - n += n2 - - return n, version, nil + n, err = FindCPUMountPoint(out, out[:n], version, scratch) + return n, version, err } -// FindCPURelativePath finds the path to the CPU cgroup that this process is a member of -// relative to the root of the cgroup mount and places it in out. scratch is a -// scratch buffer for internal use. +// FindCPUCgroup finds the path to the CPU cgroup that this process is a member of +// and places it in out. scratch is a scratch buffer for internal use. // -// out must have length PathSize minus the size of the cgroup mount root (if -// known). scratch must have length ParseSize. +// out must have length PathSize. scratch must have length ParseSize. // // Returns the number of bytes written to out and the cgroup version (1 or 2). // // Returns ErrNoCgroup if the process is not in a CPU cgroup. -func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) { +func FindCPUCgroup(out []byte, scratch []byte) (int, Version, error) { path := []byte("/proc/self/cgroup\x00") fd, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0) if errno == linux.ENOENT { @@ -259,7 +241,7 @@ func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) { // The relative path always starts with /, so we can directly append it // to the mount point. - n, version, err := parseCPURelativePath(fd, linux.Read, out[:], scratch) + n, version, err := parseCPUCgroup(fd, linux.Read, out[:], scratch) if err != nil { linux.Close(fd) return 0, 0, err @@ -269,15 +251,17 @@ func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) { return n, version, nil } -// FindCPUMountPoint finds the root of the CPU cgroup mount places it in out. +// FindCPUMountPoint finds the mount point containing the specified cgroup and +// version with cpu controller, and compose the full path to the cgroup in out. // scratch is a scratch buffer for internal use. // -// out must have length PathSize. scratch must have length ParseSize. +// out must have length PathSize, may overlap with cgroup. +// scratch must have length ParseSize. // // Returns the number of bytes written to out. // -// Returns ErrNoCgroup if the process is not in a CPU cgroup. -func FindCPUMountPoint(out []byte, scratch []byte) (int, error) { +// Returns ErrNoCgroup if no matching mount point is found. +func FindCPUMountPoint(out, cgroup []byte, version Version, scratch []byte) (int, error) { checkBufferSize(out, PathSize) checkBufferSize(scratch, ParseSize) @@ -289,7 +273,7 @@ func FindCPUMountPoint(out []byte, scratch []byte) (int, error) { return 0, errSyscallFailed } - n, err := parseCPUMount(fd, linux.Read, out, scratch) + n, err := parseCPUMount(fd, linux.Read, out, cgroup, version, scratch) if err != nil { linux.Close(fd) return 0, err diff --git a/src/internal/runtime/cgroup/cgroup_test.go b/src/internal/runtime/cgroup/cgroup_test.go index a4ffdf3ba17..79263821c3c 100644 --- a/src/internal/runtime/cgroup/cgroup_test.go +++ b/src/internal/runtime/cgroup/cgroup_test.go @@ -12,8 +12,6 @@ import ( "testing" ) -const _PATH_MAX = 4096 - func TestParseV1Number(t *testing.T) { tests := []struct { name string @@ -156,7 +154,22 @@ func TestParseV2Limit(t *testing.T) { } } -func TestParseCPURelativePath(t *testing.T) { +func readString(contents string) func(fd int, b []byte) (int, uintptr) { + r := strings.NewReader(contents) + return func(fd int, b []byte) (int, uintptr) { + n, err := r.Read(b) + if err != nil && err != io.EOF { + const dummyErrno = 42 + return n, dummyErrno + } + return n, 0 + } +} + +func TestParseCPUCgroup(t *testing.T) { + veryLongPathName := strings.Repeat("a", cgroup.PathSize+10) + evenLongerPathName := strings.Repeat("a", cgroup.ParseSize+10) + tests := []struct { name string contents string @@ -169,6 +182,16 @@ func TestParseCPURelativePath(t *testing.T) { contents: "", wantErr: true, }, + { + name: "too-long", + contents: "0::/" + veryLongPathName + "\n", + wantErr: true, + }, + { + name: "too-long-line", + contents: "0::/" + evenLongerPathName + "\n", + wantErr: true, + }, { name: "v1", contents: `2:cpu,cpuacct:/a/b/cpu @@ -196,19 +219,9 @@ func TestParseCPURelativePath(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - r := strings.NewReader(tc.contents) - read := func(fd int, b []byte) (int, uintptr) { - n, err := r.Read(b) - if err != nil && err != io.EOF { - const dummyErrno = 42 - return n, dummyErrno - } - return n, 0 - } - var got [cgroup.PathSize]byte var scratch [cgroup.ParseSize]byte - n, gotVer, err := cgroup.ParseCPURelativePath(0, read, got[:], scratch[:]) + n, gotVer, err := cgroup.ParseCPUCgroup(0, readString(tc.contents), got[:], scratch[:]) if (err != nil) != tc.wantErr { t.Fatalf("parseCPURelativePath got err %v want %v", err, tc.wantErr) } @@ -224,6 +237,25 @@ func TestParseCPURelativePath(t *testing.T) { } } +func TestParseCPUCgroupMalformed(t *testing.T) { + for _, contents := range []string{ + "\n", + "0\n", + "0:\n", + "0::\n", + "0::a\n", + } { + t.Run("", func(t *testing.T) { + var got [cgroup.PathSize]byte + var scratch [cgroup.ParseSize]byte + n, v, err := cgroup.ParseCPUCgroup(0, readString(contents), got[:], scratch[:]) + if err != cgroup.ErrMalformedFile { + t.Errorf("ParseCPUCgroup got %q (v%d), %v, want ErrMalformedFile", string(got[:n]), v, err) + } + }) + } +} + func TestContainsCPU(t *testing.T) { tests := []struct { in string @@ -279,9 +311,21 @@ func TestParseCPUMount(t *testing.T) { overlayLongLowerDir += fmt.Sprintf(":%s%d", lowerPath, i) } + var longPath [4090]byte + for i := range longPath { + longPath[i] = byte(i) + } + escapedLongPath := escapePath(string(longPath[:])) + if len(escapedLongPath) <= cgroup.PathSize { + // ensure we actually support over PathSize long escaped path + t.Fatalf("escapedLongPath is too short to test") + } + tests := []struct { name string contents string + cgroup string + version cgroup.Version want string wantErr bool }{ @@ -290,6 +334,20 @@ func TestParseCPUMount(t *testing.T) { contents: "", wantErr: true, }, + { + name: "invalid-root", + contents: "56 22 0:40 /\\1 /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct\n", + cgroup: "/", + version: cgroup.V1, + wantErr: true, + }, + { + name: "invalid-mount", + contents: "56 22 0:40 / /sys/fs/cgroup/\\1 rw - cgroup cgroup rw,cpu,cpuacct\n", + cgroup: "/", + version: cgroup.V1, + wantErr: true, + }, { name: "v1", contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw @@ -301,7 +359,9 @@ func TestParseCPUMount(t *testing.T) { 58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net 59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset `, - want: "/sys/fs/cgroup/cpu", + cgroup: "/", + version: cgroup.V1, + want: "/sys/fs/cgroup/cpu", }, { name: "v2", @@ -310,7 +370,9 @@ func TestParseCPUMount(t *testing.T) { 21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw 25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw `, - want: "/sys/fs/cgroup", + cgroup: "/", + version: cgroup.V2, + want: "/sys/fs/cgroup", }, { name: "mixed", @@ -324,7 +386,25 @@ func TestParseCPUMount(t *testing.T) { 58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net 59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset `, - want: "/sys/fs/cgroup/cpu", + cgroup: "/", + version: cgroup.V1, + want: "/sys/fs/cgroup/cpu", + }, + { + name: "mixed-choose-v2", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory +54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io +56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct +58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net +59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset +`, + cgroup: "/", + version: cgroup.V2, + want: "/sys/fs/cgroup", }, { name: "v2-escaped", @@ -333,7 +413,9 @@ func TestParseCPUMount(t *testing.T) { 21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw 25 21 0:22 / /sys/fs/cgroup/tab\011tab rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw `, - want: `/sys/fs/cgroup/tab tab`, + cgroup: "/", + version: cgroup.V2, + want: `/sys/fs/cgroup/tab tab`, }, { // Overly long line on a different mount doesn't matter. @@ -344,25 +426,125 @@ func TestParseCPUMount(t *testing.T) { 262 31 0:72 / /tmp/overlay2/0143e063b02f4801de9c847ad1c5ddc21fd2ead00653064d0c72ea967b248870/merged rw,relatime shared:729 - overlay overlay rw,lowerdir=` + overlayLongLowerDir + `,upperdir=/tmp/diff,workdir=/tmp/work 25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw `, - want: "/sys/fs/cgroup", + cgroup: "/", + version: cgroup.V2, + want: "/sys/fs/cgroup", + }, + { + name: "long-escaped-path", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 / /sys/` + escapedLongPath + ` rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +`, + cgroup: "/", + version: cgroup.V2, + want: "/sys/" + string(longPath[:]), + }, + { + name: "too-long-escaped-path", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 / /sys/` + escapedLongPath + ` rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +`, + cgroup: "/container", // compared to above, this makes the path too long + version: cgroup.V2, + wantErr: true, + }, + { + name: "non-root_mount", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 /sand /unrelated/cgroup1 rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +25 21 0:22 /stone /unrelated/cgroup2 rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +25 21 0:22 /sandbox/container/group /sys/fs/cgroup/mygroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +25 21 0:22 /sandbox /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +25 21 0:22 / /ignored/second/match rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +`, + cgroup: "/sandbox/container", + version: cgroup.V2, + want: "/sys/fs/cgroup/container", + }, + { + name: "v2-escaped-root", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 /tab\011tab /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +`, + cgroup: "/tab tab/container", + version: cgroup.V2, + want: `/sys/fs/cgroup/container`, + }, + { + name: "non-root_cgroup", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +`, + cgroup: "/sandbox/container", + version: cgroup.V2, + want: "/sys/fs/cgroup/sandbox/container", + }, + { + name: "mixed_non-root", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +25 21 0:22 /sandbox /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw +49 22 0:37 /sandbox /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory +54 22 0:38 /sandbox /sys/fs/cgroup/io rw - cgroup cgroup rw,io +56 22 0:40 /sand /unrelated/cgroup1 rw - cgroup cgroup rw,cpu,cpuacct +56 22 0:40 /stone /unrelated/cgroup2 rw - cgroup cgroup rw,cpu,cpuacct +56 22 0:40 /sandbox /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct +56 22 0:40 /sandbox/container/group /sys/fs/cgroup/cpu/mygroup rw - cgroup cgroup rw,cpu,cpuacct +56 22 0:40 / /ignored/second/match rw - cgroup cgroup rw,cpu,cpuacct +58 22 0:42 /sandbox /sys/fs/cgroup/net rw - cgroup cgroup rw,net +59 22 0:43 /sandbox /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset +`, + cgroup: "/sandbox/container", + version: cgroup.V1, + want: "/sys/fs/cgroup/cpu/container", + }, + { + // to see an example of this, for a PID in a cgroup namespace, run: + // nsenter -t -C -- cat /proc/self/cgroup + // nsenter -t -C -- grep cgroup /proc/self/mountinfo + // /mnt can be generated with `mount --bind /sys/fs/cgroup/kubepods.slice /mnt`, + // assuming PID is in cgroup /kubepods.slice + name: "out_of_namespace", + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +1243 61 0:26 /../../.. /mnt rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw +29 22 0:26 /../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw`, + cgroup: "/../../../../init.scope", + version: cgroup.V2, + want: "/sys/fs/cgroup/init.scope", + }, + { + name: "out_of_namespace-root", // the process is directly in the root cgroup + contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw +20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw +21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw +1243 61 0:26 /../../.. /mnt rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw +29 22 0:26 /../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw`, + cgroup: "/../../../..", + version: cgroup.V2, + want: "/sys/fs/cgroup", }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - r := strings.NewReader(tc.contents) - read := func(fd int, b []byte) (int, uintptr) { - n, err := r.Read(b) - if err != nil && err != io.EOF { - const dummyErrno = 42 - return n, dummyErrno - } - return n, 0 - } - var got [cgroup.PathSize]byte var scratch [cgroup.ParseSize]byte - n, err := cgroup.ParseCPUMount(0, read, got[:], scratch[:]) + n := copy(got[:], tc.cgroup) + n, err := cgroup.ParseCPUMount(0, readString(tc.contents), got[:], + got[:n], tc.version, scratch[:]) if (err != nil) != tc.wantErr { t.Fatalf("parseCPUMount got err %v want %v", err, tc.wantErr) } @@ -374,6 +556,31 @@ func TestParseCPUMount(t *testing.T) { } } +func TestParseCPUMountMalformed(t *testing.T) { + for _, contents := range []string{ + "\n", + "22\n", + "22 1 8:1\n", + "22 1 8:1 /\n", + "22 1 8:1 / /cgroup\n", + "22 1 8:1 / /cgroup rw\n", + "22 1 8:1 / /cgroup rw -\n", + "22 1 8:1 / /cgroup rw - \n", + "22 1 8:1 / /cgroup rw - cgroup\n", + "22 1 8:1 / /cgroup rw - cgroup cgroup\n", + "22 1 8:1 a /cgroup rw - cgroup cgroup cpu\n", + } { + t.Run("", func(t *testing.T) { + var got [cgroup.PathSize]byte + var scratch [cgroup.ParseSize]byte + n, err := cgroup.ParseCPUMount(0, readString(contents), got[:], []byte("/"), cgroup.V1, scratch[:]) + if err != cgroup.ErrMalformedFile { + t.Errorf("parseCPUMount got %q, %v, want ErrMalformedFile", string(got[:n]), err) + } + }) + } +} + // escapePath performs escaping equivalent to Linux's show_path. // // That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences, @@ -453,9 +660,7 @@ b/c`, t.Run("unescapePath", func(t *testing.T) { for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - in := []byte(tc.escaped) - out := make([]byte, len(in)) + runTest := func(in, out []byte) { n, err := cgroup.UnescapePath(out, in) if err != nil { t.Errorf("unescapePath got err %v want nil", err) @@ -464,6 +669,15 @@ b/c`, if got != tc.unescaped { t.Errorf("unescapePath got %q want %q", got, tc.escaped) } + } + t.Run(tc.name, func(t *testing.T) { + in := []byte(tc.escaped) + out := make([]byte, len(in)) + runTest(in, out) + }) + t.Run("inplace/"+tc.name, func(t *testing.T) { + in := []byte(tc.escaped) + runTest(in, in) }) } }) diff --git a/src/internal/runtime/cgroup/export_test.go b/src/internal/runtime/cgroup/export_test.go index 55acdc0877e..d2eac001575 100644 --- a/src/internal/runtime/cgroup/export_test.go +++ b/src/internal/runtime/cgroup/export_test.go @@ -21,6 +21,7 @@ func NewLineReader(fd int, scratch []byte, read func(fd int, b []byte) (int, uin var ( ErrEOF = errEOF ErrIncompleteLine = errIncompleteLine + ErrMalformedFile = errMalformedFile ) var ContainsCPU = containsCPU @@ -28,7 +29,7 @@ var ContainsCPU = containsCPU var ParseV1Number = parseV1Number var ParseV2Limit = parseV2Limit -var ParseCPURelativePath = parseCPURelativePath +var ParseCPUCgroup = parseCPUCgroup var ParseCPUMount = parseCPUMount var UnescapePath = unescapePath