Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better handle hitting the memory limit #2812

Merged
merged 10 commits into from
Feb 26, 2021
3 changes: 3 additions & 0 deletions libcontainer/cgroups/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,7 @@ type Manager interface {

// Whether the cgroup path exists or not
Exists() bool

// OOMKillCount reports OOM kill count for the cgroup.
OOMKillCount() (uint64, error)
}
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error {

sc := bufio.NewScanner(f)
for sc.Scan() {
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func TestCpuStats(t *testing.T) {
throttledTime = uint64(18446744073709551615)
)

cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
cpuStatContent := fmt.Sprintf("nr_periods %d\nnr_throttled %d\nthrottled_time %d\n",
nrPeriods, nrThrottled, throttledTime)
helper.writeFileContents(map[string]string{
"cpu.stat": cpuStatContent,
Expand Down
9 changes: 9 additions & 0 deletions libcontainer/cgroups/fs/fs.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"sync"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
"github.com/pkg/errors"
Expand Down Expand Up @@ -421,3 +422,11 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) {
func (m *manager) Exists() bool {
return cgroups.PathExists(m.Path("devices"))
}

func OOMKillCount(path string) (uint64, error) {
return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill")
}

func (m *manager) OOMKillCount() (uint64, error) {
return OOMKillCount(m.Path("memory"))
}
82 changes: 54 additions & 28 deletions libcontainer/cgroups/fs/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,15 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fscommon"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
)

const (
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
cgroupMemoryUsage = "memory.usage_in_bytes"
cgroupMemoryMaxUsage = "memory.max_usage_in_bytes"
)

type MemoryGroup struct {
Expand Down Expand Up @@ -57,60 +61,82 @@ func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) {
return join(path, d.pid)
}

func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
func setMemory(path string, val int64) error {
if val == 0 {
return nil
}

err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10))
if !errors.Is(err, unix.EBUSY) {
return err
}

// EBUSY means the kernel can't set new limit as it's too low
// (lower than the current usage). Return more specific error.
usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage)
if err != nil {
return err
}
max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage)
if err != nil {
return err
}

return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max)
}

func setSwap(path string, val int64) error {
if val == 0 {
return nil
}

return fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10))
}

func setMemoryAndSwap(path string, r *configs.Resources) error {
// If the memory update is set to -1 and the swap is not explicitly
// set, we should also set swap to -1, it means unlimited memory.
if cgroup.Resources.Memory == -1 && cgroup.Resources.MemorySwap == 0 {
if r.Memory == -1 && r.MemorySwap == 0 {
// Only set swap if it's enabled in kernel
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
cgroup.Resources.MemorySwap = -1
r.MemorySwap = -1
}
}

// When memory and swap memory are both set, we need to handle the cases
// for updating container.
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
memoryUsage, err := getMemoryData(path, "")
if r.Memory != 0 && r.MemorySwap != 0 {
curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit)
if err != nil {
return err
}

// When update memory limit, we should adapt the write sequence
// for memory and swap memory, so it won't fail because the new
// value and the old value don't fit kernel's validation.
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
} else {
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
return err
}
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
return err
}
}
} else {
if cgroup.Resources.Memory != 0 {
if err := fscommon.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) {
if err := setSwap(path, r.MemorySwap); err != nil {
return err
}
}
if cgroup.Resources.MemorySwap != 0 {
if err := fscommon.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
if err := setMemory(path, r.Memory); err != nil {
return err
}
return nil
}
}

if err := setMemory(path, r.Memory); err != nil {
return err
}
if err := setSwap(path, r.MemorySwap); err != nil {
return err
}

return nil
}

func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
if err := setMemoryAndSwap(path, cgroup); err != nil {
if err := setMemoryAndSwap(path, cgroup.Resources); err != nil {
return err
}

Expand Down Expand Up @@ -162,7 +188,7 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {

sc := bufio.NewScanner(statsFile)
for sc.Scan() {
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err)
}
Expand Down
9 changes: 2 additions & 7 deletions libcontainer/cgroups/fs/memory_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,6 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
helper.writeFileContents(map[string]string{
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
// Set will call getMemoryData when memory and swap memory are
// both set, fake these fields so we don't get error.
"memory.usage_in_bytes": "0",
"memory.max_usage_in_bytes": "0",
"memory.failcnt": "0",
})

helper.CgroupData.config.Resources.Memory = memoryAfter
Expand All @@ -184,14 +179,14 @@ func TestMemorySetSwapSmallerThanMemory(t *testing.T) {
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
}
if value != memoryAfter {
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
t.Fatalf("Got the wrong value (%d != %d), set memory.limit_in_bytes failed", value, memoryAfter)
}
value, err = fscommon.GetCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
if err != nil {
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
}
if value != memoryswapAfter {
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
t.Fatalf("Got the wrong value (%d != %d), set memory.memsw.limit_in_bytes failed", value, memoryswapAfter)
}
}

Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs2/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func statCpu(dirPath string, stats *cgroups.Stats) error {

sc := bufio.NewScanner(f)
for sc.Scan() {
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return err
}
Expand Down
8 changes: 8 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,11 @@ func (m *manager) GetFreezerState() (configs.FreezerState, error) {
func (m *manager) Exists() bool {
return cgroups.PathExists(m.dirPath)
}

func OOMKillCount(path string) (uint64, error) {
return fscommon.GetValueByKey(path, "memory.events", "oom_kill")
}

func (m *manager) OOMKillCount() (uint64, error) {
return OOMKillCount(m.dirPath)
}
6 changes: 1 addition & 5 deletions libcontainer/cgroups/fs2/hugetlb.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,10 @@ func statHugeTlb(dirPath string, stats *cgroups.Stats) error {
hugetlbStats.Usage = value

fileName := "hugetlb." + pagesize + ".events"
contents, err := fscommon.ReadFile(dirPath, fileName)
value, err = fscommon.GetValueByKey(dirPath, fileName, "max")
if err != nil {
return errors.Wrap(err, "failed to read stats")
}
_, value, err = fscommon.GetCgroupParamKeyValue(contents)
if err != nil {
return errors.Wrap(err, "failed to parse "+fileName)
}
hugetlbStats.Failcnt = value

stats.HugetlbStats[pagesize] = hugetlbStats
Expand Down
2 changes: 1 addition & 1 deletion libcontainer/cgroups/fs2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func statMemory(dirPath string, stats *cgroups.Stats) error {

sc := bufio.NewScanner(statsFile)
for sc.Scan() {
t, v, err := fscommon.GetCgroupParamKeyValue(sc.Text())
t, v, err := fscommon.ParseKeyValue(sc.Text())
if err != nil {
return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text())
}
Expand Down
48 changes: 34 additions & 14 deletions libcontainer/cgroups/fscommon/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,42 @@ func ParseUint(s string, base, bitSize int) (uint64, error) {
return value, nil
}

// GetCgroupParamKeyValue parses a space-separated "name value" kind of cgroup
// parameter and returns its components. For example, "io_service_bytes 1234"
// will return as "io_service_bytes", 1234.
func GetCgroupParamKeyValue(t string) (string, uint64, error) {
parts := strings.Fields(t)
switch len(parts) {
case 2:
value, err := ParseUint(parts[1], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
}
// ParseKeyValue parses a space-separated "name value" kind of cgroup
// parameter and returns its key as a string, and its value as uint64
// (ParseUint is used to convert the value). For example,
// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234.
func ParseKeyValue(t string) (string, uint64, error) {
parts := strings.SplitN(t, " ", 3)
if len(parts) != 2 {
return "", 0, fmt.Errorf("line %q is not in key value format", t)
}

return parts[0], value, nil
default:
return "", 0, ErrNotValidFormat
value, err := ParseUint(parts[1], 10, 64)
if err != nil {
return "", 0, fmt.Errorf("unable to convert to uint64: %v", err)
}

return parts[0], value, nil
}

// GetValueByKey reads a key-value pairs from the specified cgroup file,
// and returns a value of the specified key. ParseUint is used for value
// conversion.
func GetValueByKey(path, file, key string) (uint64, error) {
content, err := ReadFile(path, file)
if err != nil {
return 0, err
}

lines := strings.Split(string(content), "\n")
for _, line := range lines {
arr := strings.Split(line, " ")
if len(arr) == 2 && arr[0] == key {
return ParseUint(arr[1], 10, 64)
}
}

return 0, nil
}

// GetCgroupParamUint reads a single uint64 value from the specified cgroup file.
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,7 @@ func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) {
func (m *legacyManager) Exists() bool {
return cgroups.PathExists(m.Path("devices"))
}

func (m *legacyManager) OOMKillCount() (uint64, error) {
return fs.OOMKillCount(m.Path("memory"))
}
4 changes: 4 additions & 0 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,3 +495,7 @@ func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) {
func (m *unifiedManager) Exists() bool {
return cgroups.PathExists(m.path)
}

func (m *unifiedManager) OOMKillCount() (uint64, error) {
return fs2.OOMKillCount(m.path)
}
1 change: 1 addition & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockP
intelRdtPath: state.IntelRdtPath,
messageSockPair: messageSockPair,
logFilePair: logFilePair,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/container_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ func (m *mockCgroupManager) Exists() bool {
return err == nil
}

func (m *mockCgroupManager) OOMKillCount() (uint64, error) {
return 0, nil
}

func (m *mockCgroupManager) GetPaths() map[string]string {
return m.paths
}
Expand Down
Loading