Skip to content

Commit

Permalink
cgroup2: devices: replace all existing filters when attaching
Browse files Browse the repository at this point in the history
In the normal cases (only one existing filter or no existing filters),
just make use of BPF_F_REPLACE if there is one existing filter. However
if there is more than one filter applied, we should probably remove all
other filters since the alternative is that we will never remove our old
filters.

The only two other viable ways of solving this problem would be to use
BPF pins to either pin the eBPF program using a predictable name (so we
can always only replace *our* programs) or to switch away from custom
programs and instead use eBPF maps (which are pinned) and thus we just
update the map conntents to update the ruleset. Unfortunately these both
would add a hard requirement of bpffs and would require at least a minor
rewrite of the eBPF filtering code -- which is better left for another
time.

Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
  • Loading branch information
cyphar committed May 23, 2021
1 parent 98a3c0e commit d0f2c25
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 19 deletions.
106 changes: 98 additions & 8 deletions libcontainer/cgroups/ebpf/ebpf.go
Original file line number Diff line number Diff line change
@@ -1,29 +1,95 @@
package ebpf

import (
"fmt"
"runtime"
"unsafe"

"github.com/cilium/ebpf"
"github.com/cilium/ebpf/asm"
"github.com/cilium/ebpf/link"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

func nilCloser() error {
return nil
}

func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) {
type bpfAttrQuery struct {
TargetFd uint32
AttachType uint32
QueryType uint32
AttachFlags uint32
ProgIds uint64 // __aligned_u64
ProgCnt uint32
}

// Currently you can only have 64 eBPF programs attached to a cgroup.
size := 64
retries := 0
for retries < 10 {
progIds := make([]uint32, size)
query := bpfAttrQuery{
TargetFd: uint32(dirFd),
AttachType: uint32(unix.BPF_CGROUP_DEVICE),
ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))),
ProgCnt: uint32(len(progIds)),
}

// Fetch the list of program ids.
_, _, errno := unix.Syscall(unix.SYS_BPF,
uintptr(unix.BPF_PROG_QUERY),
uintptr(unsafe.Pointer(&query)),
unsafe.Sizeof(query))
size = int(query.ProgCnt)
runtime.KeepAlive(query)
if errno != 0 {
// On ENOSPC we get the correct number of programs.
if errno == unix.ENOSPC {
retries++
continue
}
return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno)
}

// Convert the ids to program handles.
progIds = progIds[:size]
programs := make([]*ebpf.Program, len(progIds))
for idx, progId := range progIds {
program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId))
if err != nil {
return nil, fmt.Errorf("cannot fetch program from id: %w", err)
}
programs[idx] = program
}
runtime.KeepAlive(progIds)
return programs, nil
}

return nil, errors.New("could not get complete list of CGROUP_DEVICE programs")
}

// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/<foo> directory.
//
// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 .
//
// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD int) (func() error, error) {
nilCloser := func() error {
return nil
}
func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) {
// Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167).
// This limit is not inherited into the container.
memlockLimit := &unix.Rlimit{
Cur: unix.RLIM_INFINITY,
Max: unix.RLIM_INFINITY,
}
_ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit)
// Get the list of existing programs.
oldProgs, err := findAttachedCgroupDeviceFilters(dirFd)
if err != nil {
return nilCloser, err
}
spec := &ebpf.ProgramSpec{
Type: ebpf.CGroupDevice,
Instructions: insts,
Expand All @@ -33,25 +99,49 @@ func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFD
if err != nil {
return nilCloser, err
}
// If there is only one old program, we can just replace it directly.
var replaceProg *ebpf.Program
if len(oldProgs) == 1 {
replaceProg = oldProgs[0]
}
err = link.RawAttachProgram(link.RawAttachProgramOptions{
Target: dirFD,
Target: dirFd,
Program: prog,
Replace: replaceProg,
Attach: ebpf.AttachCGroupDevice,
Flags: unix.BPF_F_ALLOW_MULTI,
})
if err != nil {
return nilCloser, errors.Wrap(err, "failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI)")
return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err)
}
closer := func() error {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFD,
Target: dirFd,
Program: prog,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return errors.Wrap(err, "failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE)")
return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err)
}
// TODO: Should we attach the old filters back in this case? Otherwise
// we fail-open on a security feature, which is a bit scary.
return nil
}
// If there was more than one old program, give a warning (since this
// really shouldn't happen with runc-managed cgroups) and then detach all
// the old programs.
if len(oldProgs) > 1 {
logrus.Warnf("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs))
for _, oldProg := range oldProgs {
err = link.RawDetachProgram(link.RawDetachProgramOptions{
Target: dirFd,
Program: oldProg,
Attach: ebpf.AttachCGroupDevice,
})
if err != nil {
return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err)
}
}
}
return closer, nil
}
11 changes: 0 additions & 11 deletions libcontainer/cgroups/fs2/devices.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,6 @@ func setDevices(dirPath string, r *configs.Resources) error {
return errors.Errorf("cannot get dir FD for %s", dirPath)
}
defer unix.Close(dirFD)
// XXX: This code is currently incorrect when it comes to updating an
// existing cgroup with new rules (new rulesets are just appended to
// the program list because this uses BPF_F_ALLOW_MULTI). If we didn't
// use BPF_F_ALLOW_MULTI we could actually atomically swap the
// programs.
//
// The real issue is that BPF_F_ALLOW_MULTI makes it hard to have a
// race-free blacklist because it acts as a whitelist by default, and
// having a deny-everything program cannot be overridden by other
// programs. You could temporarily insert a deny-everything program
// but that would result in spurrious failures during updates.
if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil {
if !canSkipEBPFError(r) {
return err
Expand Down

0 comments on commit d0f2c25

Please sign in to comment.