Skip to content

Commit

Permalink
libcontainer: ability to compile without kmem
Browse files Browse the repository at this point in the history
Commit ce43d2d (PR #1350) enables kernel memory accounting
for all cgroups created by libcontainer -- even if kmem limit is
not configured.

Kernel memory accounting is known to be broken in some kernels,
specifically the ones from RHEL7 (including RHEL 7.5). Those
kernels do not support kernel memory reclaim, and are prone to
oopses. Unconditionally enabling kmem acct on such kernels lead
to bugs, such as

* opencontainers/runc#1725
* kubernetes/kubernetes#61937
* moby/moby#29638

This commit gives a way to compile runc without kernel memory setting
support. To do so, use something like

	make BUILDTAGS="seccomp nokmem"

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Nov 1, 2018
1 parent 4d76b06 commit 229173c
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 45 deletions.
55 changes: 55 additions & 0 deletions libcontainer/cgroups/fs/kmem.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// +build linux,!nokmem

package fs

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"syscall" // for Errno type only

"github.com/opencontainers/runc/libcontainer/cgroups"
"golang.org/x/sys/unix"
)

const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"

func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
return err
}
}
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}
11 changes: 11 additions & 0 deletions libcontainer/cgroups/fs/kmem_disabled.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// +build linux,nokmem

package fs

func EnableKernelMemoryAccounting(path string) error {
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
return nil
}
47 changes: 2 additions & 45 deletions libcontainer/cgroups/fs/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,18 @@ package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall" // only for Errno

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"

"golang.org/x/sys/unix"
)

const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)

type MemoryGroup struct {
Expand Down Expand Up @@ -67,44 +62,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return nil
}

func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
return err
}
}
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}

func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
Expand Down

0 comments on commit 229173c

Please sign in to comment.