Skip to content

Commit

Permalink
libcontainer: enable to compile without kmem
Browse files Browse the repository at this point in the history
Commit fe898e7 (PR opencontainers#1350) enables kernel memory accounting
for all cgroups created by libcontainer -- even if kmem limit is
not configured.

Kernel memory accounting is known to be broken in some kernels,
specifically the ones from RHEL7 (including RHEL 7.5). Those
kernels do not support kernel memory reclaim, and are prone to
oopses. Unconditionally enabling kmem acct on such kernels lead
to bugs, such as

* opencontainers#1725
* kubernetes/kubernetes#61937
* moby/moby#29638

This commit gives a way to compile runc without kernel memory setting
support. To do so, use something like

	make BUILDTAGS="seccomp nokmem"

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Nov 1, 2018
1 parent f3ce822 commit af5a231
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 46 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
PROJECT := github.com/opencontainers/runc
BUILDTAGS := seccomp
BUILDTAGS ?= seccomp
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
COMMIT := $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")

Expand Down
55 changes: 55 additions & 0 deletions libcontainer/cgroups/fs/kmem.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// +build linux,!nokmem

package fs

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"syscall" // for Errno type only

"github.com/opencontainers/runc/libcontainer/cgroups"
"golang.org/x/sys/unix"
)

const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"

func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
return err
}
}
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}
11 changes: 11 additions & 0 deletions libcontainer/cgroups/fs/kmem_disabled.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// +build linux,nokmem

package fs

func EnableKernelMemoryAccounting(path string) error {
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
return nil
}
47 changes: 2 additions & 45 deletions libcontainer/cgroups/fs/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,18 @@ package fs
import (
"bufio"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"strings"
"syscall" // only for Errno

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"

"golang.org/x/sys/unix"
)

const (
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
cgroupMemoryLimit = "memory.limit_in_bytes"
)

type MemoryGroup struct {
Expand Down Expand Up @@ -67,44 +62,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
return nil
}

func EnableKernelMemoryAccounting(path string) error {
// Check if kernel memory is enabled
// We have to limit the kernel memory here as it won't be accounted at all
// until a limit is set on the cgroup and limit cannot be set once the
// cgroup has children, or if there are already tasks in the cgroup.
for _, i := range []int64{1, -1} {
if err := setKernelMemory(path, i); err != nil {
return err
}
}
return nil
}

func setKernelMemory(path string, kernelMemoryLimit int64) error {
if path == "" {
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
}
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
// kernel memory is not enabled on the system so we should do nothing
return nil
}
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
// Check if the error number returned by the syscall is "EBUSY"
// The EBUSY signal is returned on attempts to write to the
// memory.kmem.limit_in_bytes file if the cgroup has children or
// once tasks have been attached to the cgroup
if pathErr, ok := err.(*os.PathError); ok {
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
if errNo == unix.EBUSY {
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
}
}
}
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
}
return nil
}

func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
// If the memory update is set to -1 we should also
// set swap to -1, it means unlimited memory.
Expand Down

0 comments on commit af5a231

Please sign in to comment.