Skip to content

Commit

Permalink
Merge branch 'main' into fix-cgroup-path
Browse files Browse the repository at this point in the history
Signed-off-by: Tianyang Zhang <burning9699@gmail.com>
  • Loading branch information
Burning1020 authored Oct 30, 2023
2 parents e0c48ae + 6aa4d8d commit fd712f2
Show file tree
Hide file tree
Showing 38 changed files with 955 additions and 180 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
This aligns cgroupv2 root usage more closely with cgroupv1 reporting.
Additionally, report root swap usage as sum of swap and memory usage,
aligned with v1 and existing non-root v2 reporting. (#3933)
* Add `swapOnlyUsage` in `MemoryStats`. This field reports swap-only usage.
For cgroupv1, `Usage` and `Failcnt` are set by subtracting memory usage
from memory+swap usage. For cgroupv2, `Usage`, `Limit`, and `MaxUsage`
are set. (#4010)

### Fixed

Expand Down
3 changes: 0 additions & 3 deletions contrib/completions/bash/runc
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,6 @@ _runc_run() {
--no-subreaper
--no-pivot
--no-new-keyring
--no-mount-fallback
"

local options_with_args="
Expand Down Expand Up @@ -568,7 +567,6 @@ _runc_create() {
--help
--no-pivot
--no-new-keyring
--no-mount-fallback
"

local options_with_args="
Expand Down Expand Up @@ -629,7 +627,6 @@ _runc_restore() {
--no-pivot
--auto-dedup
--lazy-pages
--no-mount-fallback
"

local options_with_args="
Expand Down
4 changes: 0 additions & 4 deletions create.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,6 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Name: "preserve-fds",
Usage: "Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total)",
},
cli.BoolFlag{
Name: "no-mount-fallback",
Usage: "Do not fallback when the specific configuration is not applicable (e.g., do not try to remount a bind mount again after the first attempt failed on source filesystems that have nodev, noexec, nosuid, noatime, relatime, strictatime, nodiratime set)",
},
},
Action: func(context *cli.Context) error {
if err := checkArgs(context, 1, exactArgs); err != nil {
Expand Down
1 change: 0 additions & 1 deletion docs/spec-conformance.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ The following features are not implemented yet:

Spec version | Feature | PR
-------------|------------------------------------------|----------------------------------------------------------
v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126)
v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862)
v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859))
v1.1.0 | `.process.ioPriority` | [#3783](https://github.com/opencontainers/runc/pull/3783)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.20

require (
github.com/checkpoint-restore/go-criu/v6 v6.3.0
github.com/cilium/ebpf v0.12.1
github.com/cilium/ebpf v0.12.2
github.com/containerd/console v1.0.3
github.com/coreos/go-systemd/v22 v22.5.0
github.com/cyphar/filepath-securejoin v0.2.4
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/checkpoint-restore/go-criu/v6 v6.3.0 h1:mIdrSO2cPNWQY1truPg6uHLXyKHk3Z5Odx4wjKOASzA=
github.com/checkpoint-restore/go-criu/v6 v6.3.0/go.mod h1:rrRTN/uSwY2X+BPRl/gkulo9gsKOSAeVp9/K2tv7xZI=
github.com/cilium/ebpf v0.12.1 h1:0zxmBZrItv5dgJrSVYHo36yVfJAacE7Sd1xPC3fMl4M=
github.com/cilium/ebpf v0.12.1/go.mod h1:u9H29/Iq+8cy70YqI6p5pfADkFl3vdnV2qXDg5JL0Zo=
github.com/cilium/ebpf v0.12.2 h1:cP3qL4kkl19kr/F+hKqUo9F9pPMVz1oms8C7Qj0AwWk=
github.com/cilium/ebpf v0.12.2/go.mod h1:u9H29/Iq+8cy70YqI6p5pfADkFl3vdnV2qXDg5JL0Zo=
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
Expand Down
13 changes: 1 addition & 12 deletions libcontainer/cgroups/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,13 @@ func WriteFile(dir, file, data string) error {
return err
}
defer fd.Close()
if err := retryingWriteFile(fd, data); err != nil {
if _, err := fd.WriteString(data); err != nil {
// Having data in the error message helps in debugging.
return fmt.Errorf("failed to write %q: %w", data, err)
}
return nil
}

func retryingWriteFile(fd *os.File, data string) error {
for {
_, err := fd.Write([]byte(data))
if errors.Is(err, unix.EINTR) {
logrus.Infof("interrupted while writing %s to %s", data, fd.Name())
continue
}
return err
}
}

const (
cgroupfsDir = "/sys/fs/cgroup"
cgroupfsPrefix = cgroupfsDir + "/"
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (m *Manager) Set(r *configs.Resources) error {
if path == "" {
// We never created a path for this cgroup, so we cannot set
// limits for it (though we have already tried at this point).
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup, and the error is %w", sys.Name(), err)
return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name())
}
return err
}
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/cgroups/fs/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,10 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
return err
}
stats.MemoryStats.SwapUsage = swapUsage
stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{
Usage: swapUsage.Usage - memoryUsage.Usage,
Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt,
}
kernelUsage, err := getMemoryData(path, "kmem")
if err != nil {
return err
Expand Down
13 changes: 7 additions & 6 deletions libcontainer/cgroups/fs/memory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,13 @@ func TestMemoryStats(t *testing.T) {
t.Fatal(err)
}
expectedStats := cgroups.MemoryStats{
Cache: 512,
Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
Stats: map[string]uint64{"cache": 512, "rss": 1024},
UseHierarchy: true,
Cache: 512,
Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
SwapOnlyUsage: cgroups.MemoryData{Usage: 0, MaxUsage: 0, Failcnt: 0, Limit: 0},
KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192},
Stats: map[string]uint64{"cache": 512, "rss": 1024},
UseHierarchy: true,
PageUsageByNUMA: cgroups.PageUsageByNUMA{
PageUsageByNUMAInner: cgroups.PageUsageByNUMAInner{
Total: cgroups.PageStats{Total: 44611, Nodes: map[uint8]uint64{0: 32631, 1: 7501, 2: 1982, 3: 2497}},
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// misc (since kernel 5.13)
if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
if len(errs) > 0 && !m.config.Rootless {
return st, fmt.Errorf("error while statting cgroup v2: %+v", errs)
}
Expand Down
18 changes: 16 additions & 2 deletions libcontainer/cgroups/fs2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,25 +105,30 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {
memoryUsage, err := getMemoryDataV2(dirPath, "")
if err != nil {
if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint {
// The root cgroup does not have memory.{current,max}
// The root cgroup does not have memory.{current,max,peak}
// so emulate those using data from /proc/meminfo and
// /sys/fs/cgroup/memory.stat
return rootStatsFromMeminfo(stats)
}
return err
}
stats.MemoryStats.Usage = memoryUsage
swapUsage, err := getMemoryDataV2(dirPath, "swap")
swapOnlyUsage, err := getMemoryDataV2(dirPath, "swap")
if err != nil {
return err
}
stats.MemoryStats.SwapOnlyUsage = swapOnlyUsage
swapUsage := swapOnlyUsage
// As cgroup v1 reports SwapUsage values as mem+swap combined,
// while in cgroup v2 swap values do not include memory,
// report combined mem+swap for v1 compatibility.
swapUsage.Usage += memoryUsage.Usage
if swapUsage.Limit != math.MaxUint64 {
swapUsage.Limit += memoryUsage.Limit
}
// The `MaxUsage` of mem+swap cannot simply combine mem with
// swap. So set it to 0 for v1 compatibility.
swapUsage.MaxUsage = 0
stats.MemoryStats.SwapUsage = swapUsage

return nil
Expand All @@ -138,6 +143,7 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
}
usage := moduleName + ".current"
limit := moduleName + ".max"
maxUsage := moduleName + ".peak"

value, err := fscommon.GetCgroupParamUint(path, usage)
if err != nil {
Expand All @@ -157,6 +163,14 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) {
}
memoryData.Limit = value

// `memory.peak` since kernel 5.19
// `memory.swap.peak` since kernel 6.5
value, err = fscommon.GetCgroupParamUint(path, maxUsage)
if err != nil && !os.IsNotExist(err) {
return cgroups.MemoryData{}, err
}
memoryData.MaxUsage = value

return memoryData, nil
}

Expand Down
16 changes: 16 additions & 0 deletions libcontainer/cgroups/fs2/memory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ func TestStatMemoryPodCgroup(t *testing.T) {
t.Fatal(err)
}

if err := os.WriteFile(filepath.Join(fakeCgroupDir, "memory.peak"), []byte("987654321"), 0o644); err != nil {
t.Fatal(err)
}

gotStats := cgroups.NewStats()

// use a fake root path to trigger the pod cgroup lookup.
Expand All @@ -107,6 +111,18 @@ func TestStatMemoryPodCgroup(t *testing.T) {
if gotStats.MemoryStats.Usage.Usage != expectedUsageBytes {
t.Errorf("parsed cgroupv2 memory.stat doesn't match expected result: \ngot %#v\nexpected %#v\n", gotStats.MemoryStats.Usage.Usage, expectedUsageBytes)
}

// result should be "memory.max"
var expectedLimitBytes uint64 = 999999999
if gotStats.MemoryStats.Usage.Limit != expectedLimitBytes {
t.Errorf("parsed cgroupv2 memory.stat doesn't match expected result: \ngot %#v\nexpected %#v\n", gotStats.MemoryStats.Usage.Limit, expectedLimitBytes)
}

// result should be "memory.peak"
var expectedMaxUsageBytes uint64 = 987654321
if gotStats.MemoryStats.Usage.MaxUsage != expectedMaxUsageBytes {
t.Errorf("parsed cgroupv2 memory.stat doesn't match expected result: \ngot %#v\nexpected %#v\n", gotStats.MemoryStats.Usage.MaxUsage, expectedMaxUsageBytes)
}
}

func TestRootStatsFromMeminfo(t *testing.T) {
Expand Down
52 changes: 52 additions & 0 deletions libcontainer/cgroups/fs2/misc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package fs2

import (
"bufio"
"os"
"strings"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
)

func statMisc(dirPath string, stats *cgroups.Stats) error {
for _, file := range []string{"current", "events"} {
fd, err := cgroups.OpenFile(dirPath, "misc."+file, os.O_RDONLY)
if err != nil {
return err
}

s := bufio.NewScanner(fd)
for s.Scan() {
key, value, err := fscommon.ParseKeyValue(s.Text())
if err != nil {
fd.Close()
return err
}

key = strings.TrimSuffix(key, ".max")

if _, ok := stats.MiscStats[key]; !ok {
stats.MiscStats[key] = cgroups.MiscStats{}
}

tmp := stats.MiscStats[key]

switch file {
case "current":
tmp.Usage = value
case "events":
tmp.Events = value
}

stats.MiscStats[key] = tmp
}
fd.Close()

if err := s.Err(); err != nil {
return err
}
}

return nil
}
103 changes: 103 additions & 0 deletions libcontainer/cgroups/fs2/misc_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package fs2

import (
"os"
"path/filepath"
"strings"
"testing"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

const exampleMiscCurrentData = `res_a 123
res_b 456
res_c 42`

const exampleMiscEventsData = `res_a.max 1
res_b.max 2
res_c.max 3`

func TestStatMiscPodCgroupEmpty(t *testing.T) {
// We're using a fake cgroupfs.
cgroups.TestMode = true
fakeCgroupDir := t.TempDir()

// create empty misc.current and misc.events files to test the common case
// where no misc resource keys are available
for _, file := range []string{"misc.current", "misc.events"} {
if _, err := os.Create(filepath.Join(fakeCgroupDir, file)); err != nil {
t.Fatal(err)
}
}

gotStats := cgroups.NewStats()

err := statMisc(fakeCgroupDir, gotStats)
if err != nil {
t.Errorf("expected no error when statting empty misc.current/misc.events for cgroupv2, but got %#v", err)
}

if len(gotStats.MiscStats) != 0 {
t.Errorf("parsed cgroupv2 misc.* returns unexpected resources: got %#v but expected nothing", gotStats.MiscStats)
}
}

func TestStatMiscPodCgroupNotFound(t *testing.T) {
// We're using a fake cgroupfs.
cgroups.TestMode = true
fakeCgroupDir := t.TempDir()

// only write misc.current to ensure pod cgroup usage
// still reads misc.events.
statPath := filepath.Join(fakeCgroupDir, "misc.current")
if err := os.WriteFile(statPath, []byte(exampleMiscCurrentData), 0o644); err != nil {
t.Fatal(err)
}

gotStats := cgroups.NewStats()

// use a fake root path to mismatch the file we wrote.
// this triggers the non-root path which should fail to find misc.events.
err := statMisc(fakeCgroupDir, gotStats)
if err == nil {
t.Errorf("expected error when statting misc.current for cgroupv2 root, but was nil")
}

if !strings.Contains(err.Error(), "misc.events: no such file or directory") {
t.Errorf("expected error to contain 'misc.events: no such file or directory', but was %s", err.Error())
}
}

func TestStatMiscPodCgroup(t *testing.T) {
// We're using a fake cgroupfs.
cgroups.TestMode = true
fakeCgroupDir := t.TempDir()

currentPath := filepath.Join(fakeCgroupDir, "misc.current")
if err := os.WriteFile(currentPath, []byte(exampleMiscCurrentData), 0o644); err != nil {
t.Fatal(err)
}

eventsPath := filepath.Join(fakeCgroupDir, "misc.events")
if err := os.WriteFile(eventsPath, []byte(exampleMiscEventsData), 0o644); err != nil {
t.Fatal(err)
}

gotStats := cgroups.NewStats()

// use a fake root path to trigger the pod cgroup lookup.
err := statMisc(fakeCgroupDir, gotStats)
if err != nil {
t.Errorf("expected no error when statting misc for cgroupv2 root, but got %#+v", err)
}

// make sure all res_* from exampleMisc*Data are returned
if len(gotStats.MiscStats) != 3 {
t.Errorf("parsed cgroupv2 misc doesn't return all expected resources: \ngot %#v\nexpected %#v\n", len(gotStats.MiscStats), 3)
}

var expectedUsageBytes uint64 = 42
if gotStats.MiscStats["res_c"].Usage != expectedUsageBytes {
t.Errorf("parsed cgroupv2 misc.current for res_c doesn't match expected result: \ngot %#v\nexpected %#v\n", gotStats.MiscStats["res_c"].Usage, expectedUsageBytes)
}
}
Loading

0 comments on commit fd712f2

Please sign in to comment.