internal/runtime/cgroup: CPU cgroup limit discovery

For #73193.

Change-Id: I6a6a636ca9fa9cba429cf053468c56c2939cb1ac
Reviewed-on: https://go-review.googlesource.com/c/go/+/668638
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Michael Pratt 2025-04-22 10:24:37 +00:00
parent 06450a82b0
commit f12c66fbed
5 changed files with 1220 additions and 0 deletions

View file

@ -0,0 +1,710 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cgroup
import (
"internal/bytealg"
"internal/runtime/strconv"
"internal/runtime/syscall"
)
var (
ErrNoCgroup error = stringError("not in a cgroup")
errMalformedFile error = stringError("malformed file")
)
const _PATH_MAX = 4096
const (
// Required amount of scratch space for CPULimit.
//
// TODO(prattmic): This is shockingly large (~70KiB) due to the (very
// unlikely) combination of extremely long paths consisting mostly
// escaped characters. The scratch buffer ends up in .bss in package
// runtime, so it doesn't contribute to binary size and generally won't
// be faulted in, but it would still be nice to shrink this. A more
// complex parser that did not need to keep entire lines in memory
// could get away with much less. Alternatively, we could do a one-off
// mmap allocation for this buffer, which is only mapped larger if we
// actually need the extra space.
ScratchSize = PathSize + ParseSize
// Required space to store a path of the cgroup in the filesystem.
PathSize = _PATH_MAX
// /proc/self/mountinfo path escape sequences are 4 characters long, so
// a path consisting entirely of escaped characters could be 4 times
// larger.
escapedPathMax = 4 * _PATH_MAX
// Required space to parse /proc/self/mountinfo and /proc/self/cgroup.
// See findCPUMount and findCPURelativePath.
ParseSize = 4 * escapedPathMax
)
// Include explicit NUL to be sure we include it in the slice.
const (
v2MaxFile = "/cpu.max\x00"
v1QuotaFile = "/cpu.cfs_quota_us\x00"
v1PeriodFile = "/cpu.cfs_period_us\x00"
)
// Version indicates the cgroup version.
type Version int
const (
VersionUnknown Version = iota
V1
V2
)
// CPU owns the FDs required to read the CPU limit from a cgroup.
type CPU struct {
version Version
// For cgroup v1, this is cpu.cfs_quota_us.
// For cgroup v2, this is cpu.max.
quotaFD int
// For cgroup v1, this is cpu.cfs_period_us.
// For cgroup v2, this is unused.
periodFD int
}
func (c CPU) Close() {
switch c.version {
case V1:
syscall.Close(c.quotaFD)
syscall.Close(c.periodFD)
case V2:
syscall.Close(c.quotaFD)
default:
throw("impossible cgroup version")
}
}
func checkBufferSize(s []byte, size int) {
if len(s) != size {
println("runtime: cgroup buffer length", len(s), "want", size)
throw("runtime: cgroup invalid buffer length")
}
}
// OpenCPU returns a CPU for the CPU cgroup containing the current process, or
// ErrNoCgroup if the process is not in a CPU cgroup.
//
// scratch must have length ScratchSize.
func OpenCPU(scratch []byte) (CPU, error) {
checkBufferSize(scratch, ScratchSize)
base := scratch[:PathSize]
scratch2 := scratch[PathSize:]
n, version, err := FindCPU(base, scratch2)
if err != nil {
return CPU{}, err
}
switch version {
case 1:
n2 := copy(base[n:], v1QuotaFile)
path := base[:n+n2]
quotaFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
if errno != 0 {
// This may fail if this process was migrated out of
// the cgroup found by FindCPU and that cgroup has been
// deleted.
return CPU{}, errSyscallFailed
}
n2 = copy(base[n:], v1PeriodFile)
path = base[:n+n2]
periodFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
if errno != 0 {
// This may fail if this process was migrated out of
// the cgroup found by FindCPU and that cgroup has been
// deleted.
return CPU{}, errSyscallFailed
}
c := CPU{
version: 1,
quotaFD: quotaFD,
periodFD: periodFD,
}
return c, nil
case 2:
n2 := copy(base[n:], v2MaxFile)
path := base[:n+n2]
maxFD, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
if errno != 0 {
// This may fail if this process was migrated out of
// the cgroup found by FindCPU and that cgroup has been
// deleted.
return CPU{}, errSyscallFailed
}
c := CPU{
version: 2,
quotaFD: maxFD,
periodFD: -1,
}
return c, nil
default:
throw("impossible cgroup version")
panic("unreachable")
}
}
// Returns average CPU throughput limit from the cgroup, or ok false if there
// is no limit.
func ReadCPULimit(c CPU) (float64, bool, error) {
switch c.version {
case 1:
quota, err := readV1Number(c.quotaFD)
if err != nil {
return 0, false, errMalformedFile
}
if quota < 0 {
// No limit.
return 0, false, nil
}
period, err := readV1Number(c.periodFD)
if err != nil {
return 0, false, errMalformedFile
}
return float64(quota) / float64(period), true, nil
case 2:
// quotaFD is the cpu.max FD.
return readV2Limit(c.quotaFD)
default:
throw("impossible cgroup version")
panic("unreachable")
}
}
// Returns the value from the quota/period file.
func readV1Number(fd int) (int64, error) {
// The format of the file is "<value>\n" where the value is in
// int64 microseconds and, if quota, may be -1 to indicate no limit.
//
// MaxInt64 requires 19 bytes to display in base 10, thus the
// conservative max size of this file is 19 + 1 (newline) = 20 bytes.
// We'll provide a bit more for good measure.
//
// Always read from the beginning of the file to get a fresh value.
var b [64]byte
n, errno := syscall.Pread(fd, b[:], 0)
if errno != 0 {
return 0, errSyscallFailed
}
if n == len(b) {
return 0, errMalformedFile
}
buf := b[:n]
return parseV1Number(buf)
}
func parseV1Number(buf []byte) (int64, error) {
// Ignore trailing newline.
i := bytealg.IndexByte(buf, '\n')
if i < 0 {
return 0, errMalformedFile
}
buf = buf[:i]
val, ok := strconv.Atoi64(string(buf))
if !ok {
return 0, errMalformedFile
}
return val, nil
}
// Returns CPU throughput limit, or ok false if there is no limit.
func readV2Limit(fd int) (float64, bool, error) {
// The format of the file is "<quota> <period>\n" where quota and
// period are microseconds and quota may be "max" to indicate no limit.
//
// Note that the kernel is inconsistent about whether the values are
// uint64 or int64: values are parsed as uint64 but printed as int64.
// See kernel/sched/core.c:cpu_max_{show,write}.
//
// In practice, the kernel limits the period to 1s (1000000us) (see
// max_cfs_quota_period), and the quota to (1<<44)us (see
// max_cfs_runtime), so these values can't get large enough for the
// distinction to matter.
//
// MaxInt64 requires 19 bytes to display in base 10, thus the
// conservative max size of this file is 19 + 19 + 1 (space) + 1
// (newline) = 40 bytes. We'll provide a bit more for good measure.
//
// Always read from the beginning of the file to get a fresh value.
var b [64]byte
n, errno := syscall.Pread(fd, b[:], 0)
if errno != 0 {
return 0, false, errSyscallFailed
}
if n == len(b) {
return 0, false, errMalformedFile
}
buf := b[:n]
return parseV2Limit(buf)
}
func parseV2Limit(buf []byte) (float64, bool, error) {
i := bytealg.IndexByte(buf, ' ')
if i < 0 {
return 0, false, errMalformedFile
}
quotaStr := buf[:i]
if bytealg.Compare(quotaStr, []byte("max")) == 0 {
// No limit.
return 0, false, nil
}
periodStr := buf[i+1:]
// Ignore trailing newline, if any.
i = bytealg.IndexByte(periodStr, '\n')
if i < 0 {
return 0, false, errMalformedFile
}
periodStr = periodStr[:i]
quota, ok := strconv.Atoi64(string(quotaStr))
if !ok {
return 0, false, errMalformedFile
}
period, ok := strconv.Atoi64(string(periodStr))
if !ok {
return 0, false, errMalformedFile
}
return float64(quota) / float64(period), true, nil
}
// FindCPU finds the path to the CPU cgroup that this process is a member of
// and places it in out. scratch is a scratch buffer for internal use.
//
// out must have length PathSize. scratch must have length ParseSize.
//
// Returns the number of bytes written to out and the cgroup version (1 or 2).
//
// Returns ErrNoCgroup if the process is not in a CPU cgroup.
func FindCPU(out []byte, scratch []byte) (int, Version, error) {
checkBufferSize(out, PathSize)
checkBufferSize(scratch, ParseSize)
// The cgroup path is <cgroup mount point> + <relative path>.
//
// This is racy if our cgroup is changed while this runs. For example,
// initially there is only a cgroup v2 mount and we are not in a
// cgroup. After, there a cgroup v1 mount with a CPU controller and we
// are placed in a cgroup in this hierarchy. In that case, findCPUMount
// could pick the v2 mount, and findCPURelativePath could find the v2
// relative path.
//
// In this case we'll later fail to read the cgroup files and fall back
// to assuming no cgroup.
n, err := FindCPUMountPoint(out, scratch)
if err != nil {
return 0, 0, err
}
// The relative path always starts with /, so we can directly append it
// to the mount point.
n2, version, err := FindCPURelativePath(out[n:], scratch)
if err != nil {
return 0, 0, err
}
n += n2
return n, version, nil
}
// FindCPURelativePath finds the path to the CPU cgroup that this process is a member of
// relative to the root of the cgroup mount and places it in out. scratch is a
// scratch buffer for internal use.
//
// out must have length PathSize minus the size of the cgroup mount root (if
// known). scratch must have length ParseSize.
//
// Returns the number of bytes written to out and the cgroup version (1 or 2).
//
// Returns ErrNoCgroup if the process is not in a CPU cgroup.
func FindCPURelativePath(out []byte, scratch []byte) (int, Version, error) {
path := []byte("/proc/self/cgroup\x00")
fd, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
if errno == syscall.ENOENT {
return 0, 0, ErrNoCgroup
} else if errno != 0 {
return 0, 0, errSyscallFailed
}
// The relative path always starts with /, so we can directly append it
// to the mount point.
n, version, err := parseCPURelativePath(fd, syscall.Read, out[:], scratch)
if err != nil {
syscall.Close(fd)
return 0, 0, err
}
syscall.Close(fd)
return n, version, nil
}
// Finds the path of the current process's CPU cgroup relative to the cgroup
// mount and writes it to out.
//
// Returns the number of bytes written and the cgroup version (1 or 2).
func parseCPURelativePath(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, Version, error) {
// The format of each line is
//
// hierarchy-ID:controller-list:cgroup-path
//
// controller-list is comma-separated.
// See man 5 cgroup for more details.
//
// cgroup v2 has hierarchy-ID 0. If a v1 hierarchy contains "cpu", that
// is the CPU controller. Otherwise the v2 hierarchy (if any) is the
// CPU controller.
//
// hierarchy-ID and controller-list have relatively small maximum
// sizes, and the path can be up to _PATH_MAX, so we need a bit more
// than 1 _PATH_MAX of scratch space.
l := newLineReader(fd, scratch, read)
// Bytes written to out.
n := 0
for {
err := l.next()
if err == errIncompleteLine {
// Don't allow incomplete lines. While in theory the
// incomplete line may be for a controller we don't
// care about, in practice all lines should be of
// similar length, so we should just have a buffer big
// enough for any.
return 0, 0, err
} else if err == errEOF {
break
} else if err != nil {
return 0, 0, err
}
line := l.line()
// The format of each line is
//
// hierarchy-ID:controller-list:cgroup-path
//
// controller-list is comma-separated.
// See man 5 cgroup for more details.
i := bytealg.IndexByte(line, ':')
if i < 0 {
return 0, 0, errMalformedFile
}
hierarchy := line[:i]
line = line[i+1:]
i = bytealg.IndexByte(line, ':')
if i < 0 {
return 0, 0, errMalformedFile
}
controllers := line[:i]
line = line[i+1:]
path := line
if string(hierarchy) == "0" {
// v2 hierarchy.
n = copy(out, path)
// Keep searching, we might find a v1 hierarchy with a
// CPU controller, which takes precedence.
} else {
// v1 hierarchy
if containsCPU(controllers) {
// Found a v1 CPU controller. This must be the
// only one, so we're done.
return copy(out, path), V1, nil
}
}
}
if n == 0 {
// Found nothing.
return 0, 0, ErrNoCgroup
}
// Must be v2, v1 returns above.
return n, V2, nil
}
// Returns true if comma-separated list b contains "cpu".
func containsCPU(b []byte) bool {
for len(b) > 0 {
i := bytealg.IndexByte(b, ',')
if i < 0 {
// Neither cmd/compile nor gccgo allocates for these string conversions.
return string(b) == "cpu"
}
curr := b[:i]
rest := b[i+1:]
if string(curr) == "cpu" {
return true
}
b = rest
}
return false
}
// FindCPUMountPoint finds the root of the CPU cgroup mount places it in out.
// scratch is a scratch buffer for internal use.
//
// out must have length PathSize. scratch must have length ParseSize.
//
// Returns the number of bytes written to out.
//
// Returns ErrNoCgroup if the process is not in a CPU cgroup.
func FindCPUMountPoint(out []byte, scratch []byte) (int, error) {
checkBufferSize(out, PathSize)
checkBufferSize(scratch, ParseSize)
path := []byte("/proc/self/mountinfo\x00")
fd, errno := syscall.Open(&path[0], syscall.O_RDONLY|syscall.O_CLOEXEC, 0)
if errno == syscall.ENOENT {
return 0, ErrNoCgroup
} else if errno != 0 {
return 0, errSyscallFailed
}
n, err := parseCPUMount(fd, syscall.Read, out, scratch)
if err != nil {
syscall.Close(fd)
return 0, err
}
syscall.Close(fd)
return n, nil
}
// Returns the mount point for the cpu cgroup controller (v1 or v2) from
// /proc/self/mountinfo.
func parseCPUMount(fd int, read func(fd int, b []byte) (int, uintptr), out []byte, scratch []byte) (int, error) {
// The format of each line is:
//
// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
//
// (1) mount ID: unique identifier of the mount (may be reused after umount)
// (2) parent ID: ID of parent (or of self for the top of the mount tree)
// (3) major:minor: value of st_dev for files on filesystem
// (4) root: root of the mount within the filesystem
// (5) mount point: mount point relative to the process's root
// (6) mount options: per mount options
// (7) optional fields: zero or more fields of the form "tag[:value]"
// (8) separator: marks the end of the optional fields
// (9) filesystem type: name of filesystem of the form "type[.subtype]"
// (10) mount source: filesystem specific information or "none"
// (11) super options: per super block options
//
// See man 5 proc_pid_mountinfo for more details.
//
// Note that emitted paths will not contain space, tab, newline, or
// carriage return. Those are escaped. See Linux show_mountinfo ->
// show_path. We must unescape before returning.
//
// We return the mount point (5) if the filesystem type (9) is cgroup2,
// or cgroup with "cpu" in the super options (11).
//
// (4), (5), and (10) are up to _PATH_MAX. The remaining fields have a
// small fixed maximum size, so 4*_PATH_MAX is plenty of scratch space.
// Note that non-cgroup mounts may have arbitrarily long (11), but we
// can skip those when parsing.
l := newLineReader(fd, scratch, read)
// Bytes written to out.
n := 0
for {
//incomplete := false
err := l.next()
if err == errIncompleteLine {
// An incomplete line is fine as long as it doesn't
// impede parsing the fields we need. It shouldn't be
// possible for any mount to use more than 3*PATH_MAX
// before (9) because there are two paths and all other
// earlier fields have bounded options. Only (11) has
// unbounded options.
} else if err == errEOF {
break
} else if err != nil {
return 0, err
}
line := l.line()
// Skip first four fields.
for range 4 {
i := bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
line = line[i+1:]
}
// (5) mount point: mount point relative to the process's root
i := bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
mnt := line[:i]
line = line[i+1:]
// Skip ahead past optional fields, delimited by " - ".
for {
i = bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
if i+3 >= len(line) {
return 0, errMalformedFile
}
delim := line[i : i+3]
if string(delim) == " - " {
line = line[i+3:]
break
}
line = line[i+1:]
}
// (9) filesystem type: name of filesystem of the form "type[.subtype]"
i = bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
ftype := line[:i]
line = line[i+1:]
if string(ftype) != "cgroup" && string(ftype) != "cgroup2" {
continue
}
// As in findCPUPath, cgroup v1 with a CPU controller takes
// precendence over cgroup v2.
if string(ftype) == "cgroup2" {
// v2 hierarchy.
n, err = unescapePath(out, mnt)
if err != nil {
// Don't keep searching on error. The kernel
// should never produce broken escaping.
return n, err
}
// Keep searching, we might find a v1 hierarchy with a
// CPU controller, which takes precedence.
continue
}
// (10) mount source: filesystem specific information or "none"
i = bytealg.IndexByte(line, ' ')
if i < 0 {
return 0, errMalformedFile
}
// Don't care about mount source.
line = line[i+1:]
// (11) super options: per super block options
superOpt := line
// v1 hierarchy
if containsCPU(superOpt) {
// Found a v1 CPU controller. This must be the
// only one, so we're done.
return unescapePath(out, mnt)
}
}
if n == 0 {
// Found nothing.
return 0, ErrNoCgroup
}
return n, nil
}
var errInvalidEscape error = stringError("invalid path escape sequence")
// unescapePath copies in to out, unescaping escape sequences generated by
// Linux's show_path.
//
// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
// like '\040' for space.
//
// out must be at least as large as in.
//
// Returns the number of bytes written to out.
//
// Also see escapePath in cgroup_linux_test.go.
func unescapePath(out []byte, in []byte) (int, error) {
// Not strictly necessary, but simplifies the implementation and will
// always hold in users.
if len(out) < len(in) {
throw("output too small")
}
var outi, ini int
for ini < len(in) {
c := in[ini]
if c != '\\' {
out[outi] = c
outi++
ini++
continue
}
// Start of escape sequence.
// Escape sequence is always 4 characters: one slash and three
// digits.
if ini+3 >= len(in) {
return outi, errInvalidEscape
}
var outc byte
for i := range 3 {
c := in[ini+1+i]
if c < '0' || c > '9' {
return outi, errInvalidEscape
}
outc *= 8
outc += c - '0'
}
out[outi] = outc
outi++
ini += 4
}
return outi, nil
}

View file

@ -0,0 +1,476 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cgroup_test
import (
"fmt"
"internal/runtime/cgroup"
"io"
"strconv"
"strings"
"testing"
)
const _PATH_MAX = 4096
func TestParseV1Number(t *testing.T) {
tests := []struct {
name string
contents string
want int64
wantErr bool
}{
{
name: "disabled",
contents: "-1\n",
want: -1,
},
{
name: "500000",
contents: "500000\n",
want: 500000,
},
{
name: "MaxInt64",
contents: "9223372036854775807\n",
want: 9223372036854775807,
},
{
name: "missing-newline",
contents: "500000",
wantErr: true,
},
{
name: "not-a-number",
contents: "123max\n",
wantErr: true,
},
{
name: "v2",
contents: "1000 5000\n",
wantErr: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got, err := cgroup.ParseV1Number([]byte(tc.contents))
if tc.wantErr {
if err == nil {
t.Fatalf("parseV1Number got err nil want non-nil")
}
return
}
if err != nil {
t.Fatalf("parseV1Number got err %v want nil", err)
}
if got != tc.want {
t.Errorf("parseV1Number got %d want %d", got, tc.want)
}
})
}
}
func TestParseV2Limit(t *testing.T) {
tests := []struct {
name string
contents string
want float64
wantOK bool
wantErr bool
}{
{
name: "disabled",
contents: "max 100000\n",
wantOK: false,
},
{
name: "5",
contents: "500000 100000\n",
want: 5,
wantOK: true,
},
{
name: "0.5",
contents: "50000 100000\n",
want: 0.5,
wantOK: true,
},
{
name: "2.5",
contents: "250000 100000\n",
want: 2.5,
wantOK: true,
},
{
name: "MaxInt64",
contents: "9223372036854775807 9223372036854775807\n",
want: 1,
wantOK: true,
},
{
name: "missing-newline",
contents: "500000 100000",
wantErr: true,
},
{
name: "v1",
contents: "500000\n",
wantErr: true,
},
{
name: "quota-not-a-number",
contents: "500000us 100000\n",
wantErr: true,
},
{
name: "period-not-a-number",
contents: "500000 100000us\n",
wantErr: true,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got, gotOK, err := cgroup.ParseV2Limit([]byte(tc.contents))
if tc.wantErr {
if err == nil {
t.Fatalf("parseV1Limit got err nil want non-nil")
}
return
}
if err != nil {
t.Fatalf("parseV2Limit got err %v want nil", err)
}
if gotOK != tc.wantOK {
t.Errorf("parseV2Limit got ok %v want %v", gotOK, tc.wantOK)
}
if tc.wantOK && got != tc.want {
t.Errorf("parseV2Limit got %f want %f", got, tc.want)
}
})
}
}
func TestParseCPURelativePath(t *testing.T) {
tests := []struct {
name string
contents string
want string
wantVer cgroup.Version
wantErr bool
}{
{
name: "empty",
contents: "",
wantErr: true,
},
{
name: "v1",
contents: `2:cpu,cpuacct:/a/b/cpu
1:blkio:/a/b/blkio
`,
want: "/a/b/cpu",
wantVer: cgroup.V1,
},
{
name: "v2",
contents: "0::/a/b/c\n",
want: "/a/b/c",
wantVer: cgroup.V2,
},
{
name: "mixed",
contents: `2:cpu,cpuacct:/a/b/cpu
1:blkio:/a/b/blkio
0::/a/b/v2
`,
want: "/a/b/cpu",
wantVer: cgroup.V1,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
r := strings.NewReader(tc.contents)
read := func(fd int, b []byte) (int, uintptr) {
n, err := r.Read(b)
if err != nil && err != io.EOF {
const dummyErrno = 42
return n, dummyErrno
}
return n, 0
}
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
n, gotVer, err := cgroup.ParseCPURelativePath(0, read, got[:], scratch[:])
if (err != nil) != tc.wantErr {
t.Fatalf("parseCPURelativePath got err %v want %v", err, tc.wantErr)
}
if gotVer != tc.wantVer {
t.Errorf("parseCPURelativePath got cgroup version %d want %d", gotVer, tc.wantVer)
}
if string(got[:n]) != tc.want {
t.Errorf("parseCPURelativePath got %q want %q", string(got[:n]), tc.want)
}
})
}
}
func TestContainsCPU(t *testing.T) {
tests := []struct {
in string
want bool
}{
{
in: "",
want: false,
},
{
in: ",",
want: false,
},
{
in: "cpu",
want: true,
},
{
in: "memory,cpu",
want: true,
},
{
in: "cpu,memory",
want: true,
},
{
in: "memory,cpu,block",
want: true,
},
{
in: "memory,cpuacct,block",
want: false,
},
}
for _, tc := range tests {
t.Run(tc.in, func(t *testing.T) {
got := cgroup.ContainsCPU([]byte(tc.in))
if got != tc.want {
t.Errorf("containsCPU(%q) got %v want %v", tc.in, got, tc.want)
}
})
}
}
func TestParseCPUMount(t *testing.T) {
// Used for v2-longline. We want an overlayfs mount to have an option
// so long that the entire line can't possibly fit in the scratch
// buffer.
const lowerPath = "/so/many/overlay/layers"
overlayLongLowerDir := lowerPath
for i := 0; len(overlayLongLowerDir) < cgroup.ScratchSize; i++ {
overlayLongLowerDir += fmt.Sprintf(":%s%d", lowerPath, i)
}
tests := []struct {
name string
contents string
want string
wantErr bool
}{
{
name: "empty",
contents: "",
wantErr: true,
},
{
name: "v1",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
want: "/sys/fs/cgroup/cpu",
},
{
name: "v2",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
want: "/sys/fs/cgroup",
},
{
name: "mixed",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
49 22 0:37 / /sys/fs/cgroup/memory rw - cgroup cgroup rw,memory
54 22 0:38 / /sys/fs/cgroup/io rw - cgroup cgroup rw,io
56 22 0:40 / /sys/fs/cgroup/cpu rw - cgroup cgroup rw,cpu,cpuacct
58 22 0:42 / /sys/fs/cgroup/net rw - cgroup cgroup rw,net
59 22 0:43 / /sys/fs/cgroup/cpuset rw - cgroup cgroup rw,cpuset
`,
want: "/sys/fs/cgroup/cpu",
},
{
name: "v2-escaped",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
25 21 0:22 / /sys/fs/cgroup/tab\011tab rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
want: `/sys/fs/cgroup/tab tab`,
},
{
// Overly long line on a different mount doesn't matter.
name: "v2-longline",
contents: `22 1 8:1 / / rw,relatime - ext4 /dev/root rw
20 22 0:19 / /proc rw,nosuid,nodev,noexec - proc proc rw
21 22 0:20 / /sys rw,nosuid,nodev,noexec - sysfs sysfs rw
262 31 0:72 / /tmp/overlay2/0143e063b02f4801de9c847ad1c5ddc21fd2ead00653064d0c72ea967b248870/merged rw,relatime shared:729 - overlay overlay rw,lowerdir=` + overlayLongLowerDir + `,upperdir=/tmp/diff,workdir=/tmp/work
25 21 0:22 / /sys/fs/cgroup rw,nosuid,nodev,noexec - cgroup2 cgroup2 rw
`,
want: "/sys/fs/cgroup",
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
r := strings.NewReader(tc.contents)
read := func(fd int, b []byte) (int, uintptr) {
n, err := r.Read(b)
if err != nil && err != io.EOF {
const dummyErrno = 42
return n, dummyErrno
}
return n, 0
}
var got [cgroup.PathSize]byte
var scratch [cgroup.ParseSize]byte
n, err := cgroup.ParseCPUMount(0, read, got[:], scratch[:])
if (err != nil) != tc.wantErr {
t.Fatalf("parseCPUMount got err %v want %v", err, tc.wantErr)
}
if string(got[:n]) != tc.want {
t.Errorf("parseCPUMount got %q want %q", string(got[:n]), tc.want)
}
})
}
}
// escapePath performs escaping equivalent to Linux's show_path.
//
// That is, '\', ' ', '\t', and '\n' are converted to octal escape sequences,
// like '\040' for space.
func escapePath(s string) string {
out := make([]rune, 0, len(s))
for _, c := range s {
switch c {
case '\\', ' ', '\t', '\n':
out = append(out, '\\')
cs := strconv.FormatInt(int64(c), 8)
if len(cs) <= 2 {
out = append(out, '0')
}
if len(cs) <= 1 {
out = append(out, '0')
}
for _, csc := range cs {
out = append(out, csc)
}
default:
out = append(out, c)
}
}
return string(out)
}
func TestEscapePath(t *testing.T) {
tests := []struct {
name string
unescaped string
escaped string
}{
{
name: "boring",
unescaped: `/a/b/c`,
escaped: `/a/b/c`,
},
{
name: "space",
unescaped: `/a/b b/c`,
escaped: `/a/b\040b/c`,
},
{
name: "tab",
unescaped: `/a/b b/c`,
escaped: `/a/b\011b/c`,
},
{
name: "newline",
unescaped: `/a/b
b/c`,
escaped: `/a/b\012b/c`,
},
{
name: "slash",
unescaped: `/a/b\b/c`,
escaped: `/a/b\134b/c`,
},
{
name: "beginning",
unescaped: `\b/c`,
escaped: `\134b/c`,
},
{
name: "ending",
unescaped: `/a/\`,
escaped: `/a/\134`,
},
}
t.Run("escapePath", func(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := escapePath(tc.unescaped)
if got != tc.escaped {
t.Errorf("escapePath got %q want %q", got, tc.escaped)
}
})
}
})
t.Run("unescapePath", func(t *testing.T) {
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
in := []byte(tc.escaped)
out := make([]byte, len(in))
n, err := cgroup.UnescapePath(out, in)
if err != nil {
t.Errorf("unescapePath got err %v want nil", err)
}
got := string(out[:n])
if got != tc.unescaped {
t.Errorf("unescapePath got %q want %q", got, tc.escaped)
}
})
}
})
}

View file

@ -0,0 +1,15 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cgroup
var ContainsCPU = containsCPU
var ParseV1Number = parseV1Number
var ParseV2Limit = parseV2Limit
var ParseCPURelativePath = parseCPURelativePath
var ParseCPUMount = parseCPUMount
var UnescapePath = unescapePath

View file

@ -0,0 +1,14 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cgroup
import (
_ "unsafe" // for linkname
)
// Functions below pushed from runtime.
//go:linkname throw
func throw(s string)

View file

@ -1056,6 +1056,11 @@ func internal_sync_fatal(s string) {
fatal(s)
}
//go:linkname cgroup_throw internal/runtime/cgroup.throw
func cgroup_throw(s string) {
throw(s)
}
// throw triggers a fatal error that dumps a stack trace and exits.
//
// throw should be used for runtime-internal fatal errors where Go itself,