internal/runtime/cgroup: fix path on non-root mount point

We should trim the mount root (4th field in /proc/self/mountinfo) from
cgroup path read from /proc/self/cgroup before appending it to the mount
point.  Non-root mount points are very common in containers with cgroup
v1.

parseCPURelativePath is renamed to parseCPUCgroup, as it is unclear what
it is relative to. cgroups(7) says "This pathname is relative to the
mount point of the hierarchy." It should mean the root of the hierarchy,
and we cannot concat it to arbirary cgroup mount point. So just use the
word cgroup, since it parses /proc/self/cgroup.

It now returns errMalformedFile if the cgroup pathname does not start
with "/", and errPathTooLong if the pathname can't fit into the buffer.
We already rely on this when composing the path, just make this explicit
to avoid incorrect paths.

We now parse cgroup first then parse the mount point accordingly.  We
consider the previously read cgroup pathname and version to ensure we
got the desired mount point.  The out buffer is reused to pass in the
cgroup, to avoid extra memory allocation.

This should also resolve the race mentioned in the comments, so removing
those comments.  If our cgroup changed between the two read syscalls, we
will stick with the cgroup read from /proc/self/cgroup. This is the same
behavior as cgroup change after FindCPU() returns, so nothing special to
comment about now.

parseCPUMount now returns error when the combined path is too long, to
avoid panic or truncation if we got a really long path from mountinfo.

cgrouptest is changed to use dev returned from stat() to detect
filesystem boundary, since we don't return mount point and sub-path
separately now. This also avoid using os.Root since we don't handle
untrusted input here. os.Root is too complex, and the performance is
bad.

Fixes #76390

Change-Id: Ia9cbd7be3e58a2d51caf27a973fbd201dac06afc
Reviewed-on: https://go-review.googlesource.com/c/go/+/723241
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
胡玮文 2025-11-22 01:44:14 +08:00 committed by Gopher Robot
parent 6be5de4bc4
commit c2af9f14b4
5 changed files with 424 additions and 156 deletions

View file

@ -50,9 +50,8 @@ func (c *CgroupV2) SetCPUMax(quota, period int64) error {
//
// This must not be used in parallel tests, as it affects the entire process.
func InCgroupV2(t *testing.T, fn func(*CgroupV2)) {
mount, rel := findCurrent(t)
parent := findOwnedParent(t, mount, rel)
orig := filepath.Join(mount, rel)
orig := findCurrent(t)
parent := findOwnedParent(t, orig)
// Make sure the parent allows children to control cpu.
b, err := os.ReadFile(filepath.Join(parent, "cgroup.subtree_control"))
@ -93,34 +92,25 @@ func InCgroupV2(t *testing.T, fn func(*CgroupV2)) {
fn(c)
}
// Returns the mount and relative directory of the current cgroup the process
// is in.
func findCurrent(t *testing.T) (string, string) {
// Returns the filesystem path to the current cgroup the process is in.
func findCurrent(t *testing.T) string {
// Find the path to our current CPU cgroup. Currently this package is
// only used for CPU cgroup testing, so the distinction of different
// controllers doesn't matter.
var scratch [cgroup.ParseSize]byte
buf := make([]byte, cgroup.PathSize)
n, err := cgroup.FindCPUMountPoint(buf, scratch[:])
n, ver, err := cgroup.FindCPU(buf, scratch[:])
if err != nil {
t.Skipf("cgroup: unable to find current cgroup mount: %v", err)
}
mount := string(buf[:n])
n, ver, err := cgroup.FindCPURelativePath(buf, scratch[:])
if err != nil {
t.Skipf("cgroup: unable to find current cgroup path: %v", err)
}
if ver != cgroup.V2 {
t.Skipf("cgroup: running on cgroup v%d want v2", ver)
}
rel := string(buf[1:n]) // The returned path always starts with /, skip it.
rel = filepath.Join(".", rel) // Make sure this isn't empty string at root.
return mount, rel
return string(buf[:n])
}
// Returns a parent directory in which we can create our own cgroup subdirectory.
func findOwnedParent(t *testing.T, mount, rel string) string {
func findOwnedParent(t *testing.T, orig string) string {
// There are many ways cgroups may be set up on a system. We don't try
// to cover all of them, just common ones.
//
@ -142,7 +132,7 @@ func findOwnedParent(t *testing.T, mount, rel string) string {
// We want to create our own subdirectory that we can migrate into and
// then manipulate at will. It is tempting to create a new subdirectory
// inside the current cgroup we are already in, however that will likey
// inside the current cgroup we are already in, however that will likely
// not work. cgroup v2 only allows processes to be in leaf cgroups. Our
// current cgroup likely contains multiple processes (at least this one
// and the cmd/go test runner). If we make a subdirectory and try to
@ -166,27 +156,29 @@ func findOwnedParent(t *testing.T, mount, rel string) string {
// is empty. As far as I tell, the only purpose of this is to allow
// reorganizing processes into a new set of subdirectories and then
// adding controllers once done.
root, err := os.OpenRoot(mount)
var stat syscall.Stat_t
err := syscall.Stat(orig, &stat)
if err != nil {
t.Fatalf("error opening cgroup mount root: %v", err)
t.Fatalf("error stating orig cgroup: %v", err)
}
uid := os.Getuid()
var prev string
for rel != "." {
fi, err := root.Stat(rel)
cur := filepath.Dir(orig)
for cur != "/" {
var curStat syscall.Stat_t
err = syscall.Stat(cur, &curStat)
if err != nil {
t.Fatalf("error stating cgroup path: %v", err)
}
st := fi.Sys().(*syscall.Stat_t)
if int(st.Uid) != uid {
// Stop at first directory we don't own.
if int(curStat.Uid) != uid || curStat.Dev != stat.Dev {
// Stop at first directory we don't own or filesystem boundary.
break
}
prev = rel
rel = filepath.Join(rel, "..")
prev = cur
cur = filepath.Dir(cur)
}
if prev == "" {
@ -194,7 +186,7 @@ func findOwnedParent(t *testing.T, mount, rel string) string {
}
// We actually want the last directory where we were the owner.
return filepath.Join(mount, prev)
return prev
}
// Migrate the current process to the cgroup directory dst.

View file

@ -102,21 +102,23 @@ func parseV2Limit(buf []byte) (float64, bool, error) {
return float64(quota) / float64(period), true, nil
}
// Finds the path of the current process's CPU cgroup relative to the cgroup
// mount and writes it to out.
// Finds the path of the current process's CPU cgroup and writes it to out.
//
// fd is a file descriptor for /proc/self/cgroup.
// Returns the number of bytes written and the cgroup version (1 or 2).
func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
func parseCPUCgroup(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
// The format of each line is
//
// hierarchy-ID:controller-list:cgroup-path
//
// controller-list is comma-separated.
// See man 5 cgroup for more details.
//
// cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that
// is the CPU controller. Otherwise the v2 hierarchy (if any) is the
// CPU controller.
// CPU controller. It is not possible to mount the same controller
// simultaneously under both the v1 and the v2 hierarchies.
//
// See man 7 cgroups for more details.
//
// hierarchy-ID and controller-list have relatively small maximum
// sizes, and the path can be up to _PATH_MAX, so we need a bit more
@ -149,7 +151,7 @@ func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), ou
// hierarchy-ID:controller-list:cgroup-path
//
// controller-list is comma-separated.
// See man 5 cgroup for more details.
// See man 7 cgroups for more details.
i := bytealg.IndexByte(line, ':')
if i < 0 {
return 0, 0, errMalformedFile
@ -167,6 +169,15 @@ func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), ou
line = line[i+1:]
path := line
if len(path) == 0 || path[0] != '/' {
// We rely on this when composing the full path.
return 0, 0, errMalformedFile
}
if len(path) > len(out) {
// Should not be possible. If we really get a very long cgroup path,
// read /proc/self/cgroup will fail with ENAMETOOLONG.
return 0, 0, errPathTooLong
}
if string(hierarchy) == "0" {
// v2 hierarchy.
@ -214,9 +225,11 @@ func containsCPU(b []byte) bool {
return false
}
// Returns the mount point for the cpu cgroup controller (v1 or v2) from
// /proc/self/mountinfo.
func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, error) {
// Returns the path to the specified cgroup and version with cpu controller
//
// fd is a file descriptor for /proc/self/mountinfo.
// Returns the number of bytes written.
func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out, cgroup []byte, version Version, scratch []byte) (int, error) {
// The format of each line is:
//
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
@ -240,8 +253,13 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt
// carriage return. Those are escaped. See Linux show_mountinfo ->
// show_path. We must unescape before returning.
//
// We return the mount point (5) if the filesystem type (9) is cgroup2,
// or cgroup with "cpu" in the super options (11).
// A mount point matches if the filesystem type (9) is cgroup2,
// or cgroup with "cpu" in the super options (11),
// and the cgroup is in the root (4). If there are multiple matches,
// the first one is selected.
//
// We return full cgroup path, which is the mount point (5) +
// cgroup parameter without the root (4) prefix.
//
// (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a
// small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space.
@ -250,11 +268,7 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt
l := newLineReader(fd, scratch, read)
// Bytes written to out.
n := 0
for {
//incomplete := false
err := l.next()
if err == errIncompleteLine {
// An incomplete line is fine as long as it doesn't
@ -271,8 +285,8 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt
line := l.line()
// Skip first four fields.
for range 4 {
// Skip first three fields.
for range 3 {
i := bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
@ -280,11 +294,23 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt
line = line[i+1:]
}
// (5) mount point: mount point relative to the process's root
// (4) root: root of the mount within the filesystem
i := bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
root := line[:i]
if len(root) == 0 || root[0] != '/' {
// We rely on this in hasPathPrefix.
return 0, errMalformedFile
}
line = line[i+1:]
// (5) mount point: mount point relative to the process's root
i = bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
mnt := line[:i]
line = line[i+1:]
@ -313,25 +339,11 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt
ftype := line[:i]
line = line[i+1:]
if string(ftype) != "cgroup" && string(ftype) != "cgroup2" {
switch version {
case V1:
if string(ftype) != "cgroup" {
continue
}
// As in findCPUPath, cgroup v1 with a CPU controller takes
// precendence over cgroup v2.
if string(ftype) == "cgroup2" {
// v2 hierarchy.
n, err = unescapePath(out, mnt)
if err != nil {
// Don't keep searching on error. The kernel
// should never produce broken escaping.
return n, err
}
// Keep searching, we might find a v1 hierarchy with a
// CPU controller, which takes precedence.
continue
}
// (10) mount source: filesystem specific information or "none"
i = bytealg.IndexByte(line, ' ')
if i < 0 {
@ -341,25 +353,89 @@ func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byt
line = line[i+1:]
// (11) super options: per super block options
superOpt := line
// v1 hierarchy
if containsCPU(superOpt) {
// Found a v1 CPU controller. This must be the
// only one, so we're done.
return unescapePath(out, mnt)
if !containsCPU(line) {
continue
}
case V2:
if string(ftype) != "cgroup2" {
continue
}
default:
throw("impossible cgroup version")
panic("unreachable")
}
// Check cgroup is in the root.
// If the cgroup is /sandbox/container, the matching mount point root could be
// /sandbox/container, /sandbox, or /
rootLen, err := unescapePath(root, root)
if err != nil {
return 0, err
}
root = root[:rootLen]
if !hasPathPrefix(cgroup, root) {
continue // not matched, this is not the mount point we're looking for
}
// Cutoff the root from cgroup, ensure rel starts with '/' or is empty.
rel := cgroup[rootLen:]
if rootLen == 1 && len(cgroup) > 1 {
// root is "/", but cgroup is not. Keep full cgroup path.
rel = cgroup
}
if hasPathPrefix(rel, []byte("/..")) {
// the cgroup is out of current cgroup namespace, and this mount point
// cannot reach that cgroup.
//
// e.g. If the process is in cgroup /init, but in a cgroup namespace
// rooted at /sandbox/container, /proc/self/cgroup will show /../../init.
// we can reach it if the mount point root is
// /../.. or /../../init, but not if it is /.. or /
// While mount point with root /../../.. should able to reach the cgroup,
// we don't know the path to the cgroup within that mount point.
continue
}
// All conditions met, compose the full path.
// Copy rel to the correct place first, it may overlap with out.
n := unescapedLen(mnt)
if n+len(rel) > len(out) {
return 0, errPathTooLong
}
copy(out[n:], rel)
n2, err := unescapePath(out[:n], mnt)
if err != nil {
return 0, err
}
if n2 != n {
throw("wrong unescaped len")
}
return n + len(rel), nil
}
if n == 0 {
// Found nothing.
return 0, ErrNoCgroup
}
return n, nil
}
var errInvalidEscape error = stringError("invalid path escape sequence")
func hasPathPrefix(p, prefix []byte) bool {
i := len(prefix)
if i == 1 {
return true // root contains everything
}
if len(p) < i || !bytealg.Equal(prefix, p[:i]) {
return false
}
return len(p) == i || p[i] == '/' // must match at path boundary
}
var (
errInvalidEscape error = stringError("invalid path escape sequence")
errPathTooLong error = stringError("path too long")
)
func unescapedLen(in []byte) int {
return len(in) - bytealg.Count(in, byte('\\'))*3
}
// unescapePath copies in to out, unescaping escape sequences generated by
// Linux's show_path.
@ -367,20 +443,21 @@ var errInvalidEscape error = stringError("invalid path escape sequence")
// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
// like '\040' for space.
//
// out must be at least as large as in.
// Caller must ensure that out at least has unescapedLen(in) bytes.
// in and out may alias; in-place unescaping is supported.
//
// Returns the number of bytes written to out.
//
// Also see escapePath in cgroup_linux_test.go.
func unescapePath(out []byte, in []byte) (int, error) {
// Not strictly necessary, but simplifies the implementation and will
// always hold in users.
if len(out) < len(in) {
throw("output too small")
}
var outi, ini int
for ini < len(in) {
if outi >= len(out) {
// given that caller already ensured out is long enough, this
// is only possible if there are malformed escape sequences
// we have not parsed yet.
return outi, errInvalidEscape
}
c := in[ini]
if c != '\\' {
out[outi] = c

View file

@ -211,44 +211,26 @@ func FindCPU(out []byte, scratch []byte) (int, Version, error) {
checkBufferSize(scratch, ParseSize)
// The cgroup path is <cgroup mount point> + <relative path>.
//
// This is racy if our cgroup is changed while this runs. For example,
// initially there is only a cgroup v2 mount and we are not in a
// cgroup. After, there a cgroup v1 mount with a CPU controller and we
// are placed in a cgroup in this hierarchy. In that case, findCPUMount
// could pick the v2 mount, and findCPURelativePath could find the v2
// relative path.
//
// In this case we'll later fail to read the cgroup files and fall back
// to assuming no cgroup.
// relative path is the cgroup relative to the mount root.
n, err := FindCPUMountPoint(out, scratch)
n, version, err := FindCPUCgroup(out, scratch)
if err != nil {
return 0, 0, err
}
// The relative path always starts with /, so we can directly append it
// to the mount point.
n2, version, err := FindCPURelativePath(out[n:], scratch)
if err != nil {
return 0, 0, err
}
n += n2
return n, version, nil
n, err = FindCPUMountPoint(out, out[:n], version, scratch)
return n, version, err
}
// FindCPURelativePath finds the path to the CPU cgroup that this process is a member of
// relative to the root of the cgroup mount and places it in out. scratch is a
// scratch buffer for internal use.
// FindCPUCgroup finds the path to the CPU cgroup that this process is a member of
// and places it in out. scratch is a scratch buffer for internal use.
//
// out must have length PathSize minus the size of the cgroup mount root (if
// known). scratch must have length ParseSize.
// out must have length PathSize. scratch must have length ParseSize.
//
// Returns the number of bytes written to out and the cgroup version (1 or 2).
//
// Returns ErrNoCgroup if the process is not in a CPU cgroup.
func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
func FindCPUCgroup(out []byte, scratch []byte) (int, Version, error) {
path := []byte("/proc/self/cgroup\x00")
fd, errno := linux.Open(&path[0], linux.O_RDONLY|linux.O_CLOEXEC, 0)
if errno == linux.ENOENT {
@ -259,7 +241,7 @@ func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
// The relative path always starts with /, so we can directly append it
// to the mount point.
n, version, err := parseCPURelativePath(fd, linux.Read, out[:], scratch)
n, version, err := parseCPUCgroup(fd, linux.Read, out[:], scratch)
if err != nil {
linux.Close(fd)
return 0, 0, err
@ -269,15 +251,17 @@ func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
return n, version, nil
}
// FindCPUMountPoint finds the root of the CPU cgroup mount places it in out.
// FindCPUMountPoint finds the mount point containing the specified cgroup and
// version with cpu controller, and compose the full path to the cgroup in out.
// scratch is a scratch buffer for internal use.
//
// out must have length PathSize. scratch must have length ParseSize.
// out must have length PathSize, may overlap with cgroup.
// scratch must have length ParseSize.
//
// Returns the number of bytes written to out.
//
// Returns ErrNoCgroup if the process is not in a CPU cgroup.
func FindCPUMountPoint(out []byte, scratch []byte) (int, error) {
// Returns ErrNoCgroup if no matching mount point is found.
func FindCPUMountPoint(out, cgroup []byte, version Version, scratch []byte) (int, error) {
checkBufferSize(out, PathSize)
checkBufferSize(scratch, ParseSize)
@ -289,7 +273,7 @@ func FindCPUMountPoint(out []byte, scratch []byte) (int, error) {
return 0, errSyscallFailed
}
n, err := parseCPUMount(fd, linux.Read, out, scratch)
n, err := parseCPUMount(fd, linux.Read, out, cgroup, version, scratch)
if err != nil {
linux.Close(fd)
return 0, err

View file

@ -12,8 +12,6 @@ import (
"testing"
)
const _PATH_MAX = 4096
func TestParseV1Number(t *testing.T) {
tests := []struct {
name string
@ -156,7 +154,22 @@ func TestParseV2Limit(t *testing.T) {
}
}
func TestParseCPURelativePath(t *testing.T) {
func readString(contents string) func(fd int, b []byte) (int, uintptr) {
r := strings.NewReader(contents)
return func(fd int, b []byte) (int, uintptr) {
n, err := r.Read(b)
if err != nil && err != io.EOF {
const dummyErrno = 42
return n, dummyErrno
}
return n, 0
}
}
func TestParseCPUCgroup(t *testing.T) {
veryLongPathName := strings.Repeat("a", cgroup.PathSize+10)
evenLongerPathName := strings.Repeat("a", cgroup.ParseSize+10)
tests := []struct {
name string
contents string
@ -169,6 +182,16 @@ func TestParseCPURelativePath(t *testing.T) {
contents: "",
wantErr: true,
},
{
name: "too-long",
contents: "0::/" + veryLongPathName + "\n",
wantErr: true,
},
{
name: "too-long-line",
contents: "0::/" + evenLongerPathName + "\n",
wantErr: true,
},
{
name: "v1",
contents: `2:cpu,cpuacct:/a/b/cpu
@ -196,19 +219,9 @@ func TestParseCPURelativePath(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
r := strings.NewReader(tc.contents)
read := func(fd int, b []byte) (int, uintptr) {
n, err := r.Read(b)
if err != nil && err != io.EOF {
const dummyErrno = 42
return n, dummyErrno
}
return n, 0
}
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
n, gotVer, err := cgroup.ParseCPURelativePath(0, read, got[:], scratch[:])
n, gotVer, err := cgroup.ParseCPUCgroup(0, readString(tc.contents), got[:], scratch[:])
if (err != nil) != tc.wantErr {
t.Fatalf("parseCPURelativePath got err %v want %v", err, tc.wantErr)
}
@ -224,6 +237,25 @@ func TestParseCPURelativePath(t *testing.T) {
}
}
func TestParseCPUCgroupMalformed(t *testing.T) {
for _, contents := range []string{
"\n",
"0\n",
"0:\n",
"0::\n",
"0::a\n",
} {
t.Run("", func(t *testing.T) {
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
n, v, err := cgroup.ParseCPUCgroup(0, readString(contents), got[:], scratch[:])
if err != cgroup.ErrMalformedFile {
t.Errorf("ParseCPUCgroup got %q (v%d), %v, want ErrMalformedFile", string(got[:n]), v, err)
}
})
}
}
func TestContainsCPU(t *testing.T) {
tests := []struct {
in string
@ -279,9 +311,21 @@ func TestParseCPUMount(t *testing.T) {
overlayLongLowerDir += fmt.Sprintf(":%s%d", lowerPath, i)
}
var longPath [4090]byte
for i := range longPath {
longPath[i] = byte(i)
}
escapedLongPath := escapePath(string(longPath[:]))
if len(escapedLongPath) <= cgroup.PathSize {
// ensure we actually support over PathSize long escaped path
t.Fatalf("escapedLongPath is too short to test")
}
tests := []struct {
name string
contents string
cgroup string
version cgroup.Version
want string
wantErr bool
}{
@ -290,6 +334,20 @@ func TestParseCPUMount(t *testing.T) {
contents: "",
wantErr: true,
},
{
name: "invalid-root",
contents: "56 22 0:40 /\\1 /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct\n",
cgroup: "/",
version: cgroup.V1,
wantErr: true,
},
{
name: "invalid-mount",
contents: "56 22 0:40 / /sys/fs/cgroup/\\1 rw - cgroup cgroup rw,cpu,cpuacct\n",
cgroup: "/",
version: cgroup.V1,
wantErr: true,
},
{
name: "v1",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
@ -301,6 +359,8 @@ func TestParseCPUMount(t *testing.T) {
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
cgroup: "/",
version: cgroup.V1,
want: "/sys/fs/cgroup/cpu",
},
{
@ -310,6 +370,8 @@ func TestParseCPUMount(t *testing.T) {
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/",
version: cgroup.V2,
want: "/sys/fs/cgroup",
},
{
@ -324,8 +386,26 @@ func TestParseCPUMount(t *testing.T) {
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
cgroup: "/",
version: cgroup.V1,
want: "/sys/fs/cgroup/cpu",
},
{
name: "mixed-choose-v2",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
cgroup: "/",
version: cgroup.V2,
want: "/sys/fs/cgroup",
},
{
name: "v2-escaped",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
@ -333,6 +413,8 @@ func TestParseCPUMount(t *testing.T) {
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup/tab\011tab rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/",
version: cgroup.V2,
want: `/sys/fs/cgroup/tab tab`,
},
{
@ -344,25 +426,125 @@ func TestParseCPUMount(t *testing.T) {
262 31 0:72 / /tmp/overlay2/0143e063b02f4801de9c847ad1c5ddc21fd2ead00653064d0c72ea967b248870/merged rw,relatime shared:729 - overlay overlay rw,lowerdir=` + overlayLongLowerDir + `,upperdir=/tmp/diff,workdir=/tmp/work
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/",
version: cgroup.V2,
want: "/sys/fs/cgroup",
},
{
name: "long-escaped-path",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/` + escapedLongPath + ` rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/",
version: cgroup.V2,
want: "/sys/" + string(longPath[:]),
},
{
name: "too-long-escaped-path",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/` + escapedLongPath + ` rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/container", // compared to above, this makes the path too long
version: cgroup.V2,
wantErr: true,
},
{
name: "non-root_mount",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 /sand /unrelated/cgroup1 rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
25 21 0:22 /stone /unrelated/cgroup2 rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
25 21 0:22 /sandbox/container/group /sys/fs/cgroup/mygroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
25 21 0:22 /sandbox /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
25 21 0:22 / /ignored/second/match rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/sandbox/container",
version: cgroup.V2,
want: "/sys/fs/cgroup/container",
},
{
name: "v2-escaped-root",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 /tab\011tab /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/tab tab/container",
version: cgroup.V2,
want: `/sys/fs/cgroup/container`,
},
{
name: "non-root_cgroup",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
cgroup: "/sandbox/container",
version: cgroup.V2,
want: "/sys/fs/cgroup/sandbox/container",
},
{
name: "mixed_non-root",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 /sandbox /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
49 22 0:37 /sandbox /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
54 22 0:38 /sandbox /sys/fs/cgroup/io rw - cgroup cgroup rw,io
56 22 0:40 /sand /unrelated/cgroup1 rw - cgroup cgroup rw,cpu,cpuacct
56 22 0:40 /stone /unrelated/cgroup2 rw - cgroup cgroup rw,cpu,cpuacct
56 22 0:40 /sandbox /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
56 22 0:40 /sandbox/container/group /sys/fs/cgroup/cpu/mygroup rw - cgroup cgroup rw,cpu,cpuacct
56 22 0:40 / /ignored/second/match rw - cgroup cgroup rw,cpu,cpuacct
58 22 0:42 /sandbox /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 /sandbox /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
cgroup: "/sandbox/container",
version: cgroup.V1,
want: "/sys/fs/cgroup/cpu/container",
},
{
// to see an example of this, for a PID in a cgroup namespace, run:
// nsenter -t <PID> -C -- cat /proc/self/cgroup
// nsenter -t <PID> -C -- grep cgroup /proc/self/mountinfo
// /mnt can be generated with `mount --bind /sys/fs/cgroup/kubepods.slice /mnt`,
// assuming PID is in cgroup /kubepods.slice
name: "out_of_namespace",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
1243 61 0:26 /../../.. /mnt rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw
29 22 0:26 /../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw`,
cgroup: "/../../../../init.scope",
version: cgroup.V2,
want: "/sys/fs/cgroup/init.scope",
},
{
name: "out_of_namespace-root", // the process is directly in the root cgroup
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
1243 61 0:26 /../../.. /mnt rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw
29 22 0:26 /../../../.. /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime shared:4 - cgroup2 cgroup2 rw`,
cgroup: "/../../../..",
version: cgroup.V2,
want: "/sys/fs/cgroup",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
r := strings.NewReader(tc.contents)
read := func(fd int, b []byte) (int, uintptr) {
n, err := r.Read(b)
if err != nil && err != io.EOF {
const dummyErrno = 42
return n, dummyErrno
}
return n, 0
}
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
n, err := cgroup.ParseCPUMount(0, read, got[:], scratch[:])
n := copy(got[:], tc.cgroup)
n, err := cgroup.ParseCPUMount(0, readString(tc.contents), got[:],
got[:n], tc.version, scratch[:])
if (err != nil) != tc.wantErr {
t.Fatalf("parseCPUMount got err %v want %v", err, tc.wantErr)
}
@ -374,6 +556,31 @@ func TestParseCPUMount(t *testing.T) {
}
}
func TestParseCPUMountMalformed(t *testing.T) {
for _, contents := range []string{
"\n",
"22\n",
"22 1 8:1\n",
"22 1 8:1 /\n",
"22 1 8:1 / /cgroup\n",
"22 1 8:1 / /cgroup rw\n",
"22 1 8:1 / /cgroup rw -\n",
"22 1 8:1 / /cgroup rw - \n",
"22 1 8:1 / /cgroup rw - cgroup\n",
"22 1 8:1 / /cgroup rw - cgroup cgroup\n",
"22 1 8:1 a /cgroup rw - cgroup cgroup cpu\n",
} {
t.Run("", func(t *testing.T) {
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
n, err := cgroup.ParseCPUMount(0, readString(contents), got[:], []byte("/"), cgroup.V1, scratch[:])
if err != cgroup.ErrMalformedFile {
t.Errorf("parseCPUMount got %q, %v, want ErrMalformedFile", string(got[:n]), err)
}
})
}
}
// escapePath performs escaping equivalent to Linux's show_path.
//
// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
@ -453,9 +660,7 @@ b/c`,
t.Run("unescapePath", func(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
in := []byte(tc.escaped)
out := make([]byte, len(in))
runTest := func(in, out []byte) {
n, err := cgroup.UnescapePath(out, in)
if err != nil {
t.Errorf("unescapePath got err %v want nil", err)
@ -464,6 +669,15 @@ b/c`,
if got != tc.unescaped {
t.Errorf("unescapePath got %q want %q", got, tc.escaped)
}
}
t.Run(tc.name, func(t *testing.T) {
in := []byte(tc.escaped)
out := make([]byte, len(in))
runTest(in, out)
})
t.Run("inplace/"+tc.name, func(t *testing.T) {
in := []byte(tc.escaped)
runTest(in, in)
})
}
})

View file

@ -21,6 +21,7 @@ func NewLineReader(fd int, scratch []byte, read func(fd int, b []byte) (int, uin
var (
ErrEOF = errEOF
ErrIncompleteLine = errIncompleteLine
ErrMalformedFile = errMalformedFile
)
var ContainsCPU = containsCPU
@ -28,7 +29,7 @@ var ContainsCPU = containsCPU
var ParseV1Number = parseV1Number
var ParseV2Limit = parseV2Limit
var ParseCPURelativePath = parseCPURelativePath
var ParseCPUCgroup = parseCPUCgroup
var ParseCPUMount = parseCPUMount
var UnescapePath = unescapePath