diff --git a/libcontainer/cgroups/cgroups.go b/libcontainer/cgroups/cgroups.go index a16a68e97..c418fbae9 100644 --- a/libcontainer/cgroups/cgroups.go +++ b/libcontainer/cgroups/cgroups.go @@ -48,4 +48,7 @@ type Manager interface { // Whether the cgroup path exists or not Exists() bool + + // OOMKillCount reports OOM kill count for the cgroup. + OOMKillCount() (uint64, error) } diff --git a/libcontainer/cgroups/fs/fs.go b/libcontainer/cgroups/fs/fs.go index 98e1e3426..67b887f32 100644 --- a/libcontainer/cgroups/fs/fs.go +++ b/libcontainer/cgroups/fs/fs.go @@ -11,6 +11,7 @@ import ( "sync" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" "github.com/pkg/errors" @@ -454,3 +455,11 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) { func (m *manager) Exists() bool { return cgroups.PathExists(m.Path("devices")) } + +func OOMKillCount(path string) (uint64, error) { + return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill") +} + +func (m *manager) OOMKillCount() (uint64, error) { + return OOMKillCount(m.Path("memory")) +} diff --git a/libcontainer/cgroups/fs/memory.go b/libcontainer/cgroups/fs/memory.go index 41adcd38f..4bac6754b 100644 --- a/libcontainer/cgroups/fs/memory.go +++ b/libcontainer/cgroups/fs/memory.go @@ -15,6 +15,8 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" + "github.com/pkg/errors" + "golang.org/x/sys/unix" ) const ( @@ -28,6 +30,8 @@ const ( cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" cgroupMemoryLimit = "memory.limit_in_bytes" cgroupMemoryPagesByNuma = "memory.numa_stat" + cgroupMemoryUsage = "memory.usage_in_bytes" + cgroupMemoryMaxUsage = "memory.max_usage_in_bytes" ) type MemoryGroup struct { @@ -66,20 +70,52 @@ func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) { return join(path, d.pid) } -func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { +func setMemory(path string, val int64) error { + if val == 0 { + return nil + } + + err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10)) + if !errors.Is(err, unix.EBUSY) { + return err + } + + // EBUSY means the kernel can't set new limit as it's too low + // (lower than the current usage). Return more specific error. + usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage) + if err != nil { + return err + } + max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage) + if err != nil { + return err + } + + return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max) +} + +func setSwap(path string, val int64) error { + if val == 0 { + return nil + } + + return fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10)) +} + +func setMemoryAndSwap(path string, r *configs.Resources) error { // If the memory update is set to -1 and the swap is not explicitly // set, we should also set swap to -1, it means unlimited memory. - if cgroup.Resources.Memory == -1 && cgroup.Resources.MemorySwap == 0 { + if r.Memory == -1 && r.MemorySwap == 0 { // Only set swap if it's enabled in kernel if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { - cgroup.Resources.MemorySwap = -1 + r.MemorySwap = -1 } } // When memory and swap memory are both set, we need to handle the cases // for updating container. - if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 { - memoryUsage, err := getMemoryData(path, "") + if r.Memory != 0 && r.MemorySwap != 0 { + curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit) if err != nil { return err } @@ -87,39 +123,29 @@ func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error { // When update memory limit, we should adapt the write sequence // for memory and swap memory, so it won't fail because the new // value and the old value don't fit kernel's validation. - if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) { - if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { - return err - } - if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { - return err - } - } else { - if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { - return err - } - if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { - return err - } - } - } else { - if cgroup.Resources.Memory != 0 { - if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil { + if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) { + if err := setSwap(path, r.MemorySwap); err != nil { return err } - } - if cgroup.Resources.MemorySwap != 0 { - if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil { + if err := setMemory(path, r.Memory); err != nil { return err } + return nil } } + if err := setMemory(path, r.Memory); err != nil { + return err + } + if err := setSwap(path, r.MemorySwap); err != nil { + return err + } + return nil } func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error { - if err := setMemoryAndSwap(path, cgroup); err != nil { + if err := setMemoryAndSwap(path, cgroup.Resources); err != nil { return err } diff --git a/libcontainer/cgroups/fs/memory_test.go b/libcontainer/cgroups/fs/memory_test.go index 52244fc7b..a1727c547 100644 --- a/libcontainer/cgroups/fs/memory_test.go +++ b/libcontainer/cgroups/fs/memory_test.go @@ -158,11 +158,6 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) { helper.writeFileContents(map[string]string{ "memory.limit_in_bytes": strconv.Itoa(memoryBefore), "memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore), - // Set will call getMemoryData when memory and swap memory are - // both set, fake these fields so we don't get error. - "memory.usage_in_bytes": "0", - "memory.max_usage_in_bytes": "0", - "memory.failcnt": "0", }) helper.CgroupData.config.Resources.Memory = memoryAfter @@ -177,14 +172,14 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) { t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err) } if value != memoryAfter { - t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.") + t.Fatalf("Got the wrong value (%d != %d), set memory.limit_in_bytes failed", value, memoryAfter) } value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes") if err != nil { t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err) } if value != memoryswapAfter { - t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.") + t.Fatalf("Got the wrong value (%d != %d), set memory.memsw.limit_in_bytes failed", value, memoryswapAfter) } } diff --git a/libcontainer/cgroups/fs2/fs2.go b/libcontainer/cgroups/fs2/fs2.go index 0975064f2..a3a394bb8 100644 --- a/libcontainer/cgroups/fs2/fs2.go +++ b/libcontainer/cgroups/fs2/fs2.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" "github.com/pkg/errors" ) @@ -227,3 +228,11 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) { func (m *manager) Exists() bool { return cgroups.PathExists(m.dirPath) } + +func OOMKillCount(path string) (uint64, error) { + return fscommon.GetValueByKey(path, "memory.events", "oom_kill") +} + +func (m *manager) OOMKillCount() (uint64, error) { + return OOMKillCount(m.dirPath) +} diff --git a/libcontainer/cgroups/fscommon/utils.go b/libcontainer/cgroups/fscommon/utils.go index 46c3c7799..6a53a50ce 100644 --- a/libcontainer/cgroups/fscommon/utils.go +++ b/libcontainer/cgroups/fscommon/utils.go @@ -53,6 +53,26 @@ func GetCgroupParamKeyValue(t string) (string, uint64, error) { } } +// GetValueByKey reads a key-value pairs from the specified cgroup file, +// and returns a value of the specified key. ParseUint is used for value +// conversion. +func GetValueByKey(path, file, key string) (uint64, error) { + content, err := ioutil.ReadFile(filepath.Join(path, file)) + if err != nil { + return 0, err + } + + lines := strings.Split(string(content), "\n") + for _, line := range lines { + arr := strings.Split(line, " ") + if len(arr) == 2 && arr[0] == key { + return ParseUint(arr[1], 10, 64) + } + } + + return 0, nil +} + // Gets a single uint64 value from the specified cgroup file. func GetCgroupParamUint(cgroupPath, cgroupFile string) (uint64, error) { fileName := filepath.Join(cgroupPath, cgroupFile) diff --git a/libcontainer/cgroups/systemd/v1.go b/libcontainer/cgroups/systemd/v1.go index cc2d19cd2..4a7ae8a93 100644 --- a/libcontainer/cgroups/systemd/v1.go +++ b/libcontainer/cgroups/systemd/v1.go @@ -440,3 +440,7 @@ func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) { func (m *legacyManager) Exists() bool { return cgroups.PathExists(m.Path("devices")) } + +func (m *legacyManager) OOMKillCount() (uint64, error) { + return fs.OOMKillCount(m.Path("memory")) +} diff --git a/libcontainer/cgroups/systemd/v2.go b/libcontainer/cgroups/systemd/v2.go index e1a6622a0..0efa9571d 100644 --- a/libcontainer/cgroups/systemd/v2.go +++ b/libcontainer/cgroups/systemd/v2.go @@ -358,3 +358,7 @@ func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) { func (m *unifiedManager) Exists() bool { return cgroups.PathExists(m.path) } + +func (m *unifiedManager) OOMKillCount() (uint64, error) { + return fs2.OOMKillCount(m.path) +} diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 8594cc742..453281e01 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -579,6 +579,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP intelRdtPath: state.IntelRdtPath, messageSockPair: messageSockPair, logFilePair: logFilePair, + manager: c.cgroupManager, config: c.newInitConfig(p), process: p, bootstrapData: data, diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index 72ca1ef72..a725000a5 100644 --- a/libcontainer/container_linux_test.go +++ b/libcontainer/container_linux_test.go @@ -55,6 +55,10 @@ func (m *mockCgroupManager) Exists() bool { return err == nil } +func (m *mockCgroupManager) OOMKillCount() (uint64, error) { + return 0, nil +} + func (m *mockCgroupManager) GetPaths() map[string]string { return m.paths } diff --git a/libcontainer/notify_linux_v2.go b/libcontainer/notify_linux_v2.go index cdab10ed6..6b48d5a39 100644 --- a/libcontainer/notify_linux_v2.go +++ b/libcontainer/notify_linux_v2.go @@ -3,32 +3,15 @@ package libcontainer import ( - "io/ioutil" "path/filepath" - "strconv" - "strings" "unsafe" "github.com/pkg/errors" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" -) - -func getValueFromCgroup(path, key string) (int, error) { - content, err := ioutil.ReadFile(path) - if err != nil { - return 0, err - } - lines := strings.Split(string(content), "\n") - for _, line := range lines { - arr := strings.Split(line, " ") - if len(arr) == 2 && arr[0] == key { - return strconv.Atoi(arr[1]) - } - } - return 0, nil -} + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" +) func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) { eventControlPath := filepath.Join(cgDir, evName) @@ -79,12 +62,12 @@ func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, err } switch int(rawEvent.Wd) { case evFd: - oom, err := getValueFromCgroup(eventControlPath, "oom_kill") + oom, err := fscommon.GetValueByKey(cgDir, evName, "oom_kill") if err != nil || oom > 0 { ch <- struct{}{} } case cgFd: - pids, err := getValueFromCgroup(cgEvPath, "populated") + pids, err := fscommon.GetValueByKey(cgDir, cgEvName, "populated") if err != nil || pids == 0 { return } diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index d914f0d0c..080f3d163 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -64,6 +64,7 @@ type setnsProcess struct { logFilePair filePair cgroupPaths map[string]string rootlessCgroups bool + manager cgroups.Manager intelRdtPath string config *initConfig fds []string @@ -87,6 +88,8 @@ func (p *setnsProcess) signal(sig os.Signal) error { func (p *setnsProcess) start() (retErr error) { defer p.messageSockPair.parent.Close() + // get the "before" value of oom kill count + oom, _ := p.manager.OOMKillCount() err := p.cmd.Start() // close the write-side of the pipes (controlled by child) p.messageSockPair.child.Close() @@ -96,6 +99,10 @@ func (p *setnsProcess) start() (retErr error) { } defer func() { if retErr != nil { + if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom { + // Someone in this cgroup was killed, this _might_ be us. + retErr = newSystemErrorWithCause(retErr, "possibly OOM-killed") + } err := ignoreTerminateErrors(p.terminate()) if err != nil { logrus.WithError(err).Warn("unable to terminate setnsProcess") @@ -320,6 +327,24 @@ func (p *initProcess) start() (retErr error) { } defer func() { if retErr != nil { + // init might be killed by the kernel's OOM killer. + oom, err := p.manager.OOMKillCount() + if err != nil { + logrus.WithError(err).Warn("unable to get oom kill count") + } else if oom > 0 { + // Does not matter what the particular error was, + // its cause is most probably OOM, so report that. + const oomError = "container init was OOM-killed (memory limit too low?)" + + if logrus.GetLevel() >= logrus.DebugLevel { + // Only show the original error if debug is set, + // as it is not generally very useful. + retErr = newSystemErrorWithCause(retErr, oomError) + } else { + retErr = newSystemError(errors.New(oomError)) + } + } + // terminate the process to ensure we can remove cgroups if err := ignoreTerminateErrors(p.terminate()); err != nil { logrus.WithError(err).Warn("unable to terminate initProcess") diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 489090ce8..dd2b6e534 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -78,7 +78,7 @@ function setup() { runc run -d --console-socket "$CONSOLE_SOCKET" test_cgroups_permissions [ "$status" -eq 1 ] - [[ ${lines[0]} == *"permission denied"* ]] + [[ "$output" == *"applying cgroup configuration"*"permission denied"* ]] } @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" { @@ -91,7 +91,8 @@ function setup() { runc run -d --console-socket "$CONSOLE_SOCKET" test_cgroups_permissions [ "$status" -eq 1 ] - [[ ${lines[0]} == *"rootless needs no limits + no cgrouppath when no permission is granted for cgroups"* ]] || [[ ${lines[0]} == *"cannot set pids limit: container could not join or create cgroup"* ]] + [[ "$output" == *"rootless needs no limits + no cgrouppath when no permission is granted for cgroups"* ]] || + [[ "$output" == *"cannot set pids limit: container could not join or create cgroup"* ]] } @test "runc create (limits + cgrouppath + permission on the cgroup dir) succeeds" {