diff --git a/libbpfgo.go b/libbpfgo.go index 002c4fc2..2100ffb5 100644 --- a/libbpfgo.go +++ b/libbpfgo.go @@ -5,7 +5,9 @@ package libbpfgo #include #include +#include #include +#include #include #include @@ -30,26 +32,40 @@ static inline long PTR_ERR(const void *ptr) } #endif -int libbpf_print_fn(enum libbpf_print_level level, const char *format, +int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) { - if (level != LIBBPF_WARN) - return 0; + if (level != LIBBPF_WARN) + return 0; + + va_list check; + va_copy(check, args); + char *str = va_arg(check, char *); // BUG: https://github.com/aquasecurity/tracee/issues/1676 - va_list check; va_copy(check, args); - char *str = va_arg(check, char *); if (strstr(str, "Exclusivity flag on") != NULL) { va_end(check); return 0; } - va_end(check); - return vfprintf(stderr, format, args); + // AttachCgroupLegacy() will first try AttachCgroup() and it + // might fail. This is not an error and is the best way of + // probing for eBPF cgroup attachment link existence. + + str = va_arg(check, char *); + if (strstr(str, "cgroup") != NULL) { + str = va_arg(check, char *); + if (strstr(str, "Invalid argument") != NULL) + return 0; + } + + return vfprintf(stderr, format, args); } -void set_print_fn() { +void set_print_fn() +{ libbpf_set_print(libbpf_print_fn); } @@ -86,6 +102,35 @@ struct perf_buffer * init_perf_buf(int map_fd, int page_cnt, uintptr_t ctx) return pb; } + +int bpf_prog_attach_cgroup_legacy( + int prog_fd, // eBPF program file descriptor + int target_fd, // cgroup directory file descriptor + int type) // BPF_CGROUP_INET_{INGRESS,EGRESS}, ... +{ + union bpf_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + attr.attach_flags = BPF_F_ALLOW_MULTI; // or BPF_F_ALLOW_OVERRIDE + + return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach_cgroup_legacy( + int prog_fd, // eBPF program file descriptor + int target_fd, // cgroup directory file descriptor + int type) // BPF_CGROUP_INET_{INGRESS,EGRESS}, ... +{ + union bpf_attr attr; + memset(&attr, 0, sizeof(attr)); + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); +} */ import "C" @@ -239,23 +284,42 @@ const ( Tracing XDP Cgroup + CgroupLegacy Netns ) +type BPFLinkLegacy struct { + attachType BPFAttachType + cgroupDir string +} + type BPFLink struct { link *C.struct_bpf_link prog *BPFProg linkType LinkType eventName string + legacy *BPFLinkLegacy // if set, this is a fake BPFLink +} + +func (l *BPFLink) DestroyLegacy(linkType LinkType) error { + switch l.linkType { + case CgroupLegacy: + return l.prog.DetachCgroupLegacy( + l.legacy.cgroupDir, + l.legacy.attachType, + ) + } + return fmt.Errorf("unable to destroy legacy link") } func (l *BPFLink) Destroy() error { - ret := C.bpf_link__destroy(l.link) - if ret < 0 { + if l.legacy != nil { + return l.DestroyLegacy(l.linkType) + } + if ret := C.bpf_link__destroy(l.link); ret < 0 { return syscall.Errno(-ret) } l.link = nil - return nil } @@ -924,12 +988,11 @@ func (b *BPFMap) DeleteKey(key unsafe.Pointer) error { // // For example: // -// key := 1 -// value := []byte{'a', 'b', 'c'} -// keyPtr := unsafe.Pointer(&key) -// valuePtr := unsafe.Pointer(&value[0]) -// bpfmap.Update(keyPtr, valuePtr) -// +// key := 1 +// value := []byte{'a', 'b', 'c'} +// keyPtr := unsafe.Pointer(&key) +// valuePtr := unsafe.Pointer(&value[0]) +// bpfmap.Update(keyPtr, valuePtr) func (b *BPFMap) Update(key, value unsafe.Pointer) error { return b.UpdateValueFlags(key, value, MapFlagUpdateAny) } @@ -1301,20 +1364,30 @@ func (p *BPFProg) SetAttachType(attachType BPFAttachType) { C.bpf_program__set_expected_attach_type(p.prog, C.enum_bpf_attach_type(int(attachType))) } -func (p *BPFProg) AttachCgroup(cgroupV2DirPath string) (*BPFLink, error) { +// getCgroupDirFD returns a file descriptor for a given cgroup2 directory path +func getCgroupDirFD(cgroupV2DirPath string) (int, error) { const ( O_DIRECTORY int = 0200000 O_RDONLY int = 00 ) fd, err := syscall.Open(cgroupV2DirPath, O_DIRECTORY|O_RDONLY, 0) if fd < 0 { - return nil, fmt.Errorf("failed to open cgroupv2 directory path %s: %w", cgroupV2DirPath, err) + return 0, fmt.Errorf("failed to open cgroupv2 directory path %s: %w", cgroupV2DirPath, err) } - link := C.bpf_program__attach_cgroup(p.prog, C.int(fd)) + return fd, nil +} + +// AttachCgroup attaches the BPFProg to a cgroup described by given fd. +func (p *BPFProg) AttachCgroup(cgroupV2DirPath string) (*BPFLink, error) { + cgroupDirFD, err := getCgroupDirFD(cgroupV2DirPath) + if err != nil { + return nil, err + } + defer syscall.Close(cgroupDirFD) + link := C.bpf_program__attach_cgroup(p.prog, C.int(cgroupDirFD)) if C.IS_ERR_OR_NULL(unsafe.Pointer(link)) { return nil, errptrError(unsafe.Pointer(link), "failed to attach cgroup on cgroupv2 %s to program %s", cgroupV2DirPath, p.name) } - // dirName will be used in bpfLink.eventName. eventName follows a format // convention and is used to better identify link types and what they are // linked with in case of errors or similar needs. Having eventName as: @@ -1331,6 +1404,68 @@ func (p *BPFProg) AttachCgroup(cgroupV2DirPath string) (*BPFLink, error) { return bpfLink, nil } +// AttachCgroupLegacy attaches the BPFProg to a cgroup described by the given +// fd. It first tries to use the most recent attachment method and, if that does +// not work, instead of failing, it tries the legacy way: to attach the cgroup +// eBPF program without previously creating a link. This allows attaching cgroup +// eBPF ingress/egress in older kernels. Note: the first attempt error message +// is filtered out inside libbpf_print_fn() as it is actually a feature probe +// attempt as well. +// +// Related kernel commit: https://github.com/torvalds/linux/commit/af6eea57437a +func (p *BPFProg) AttachCgroupLegacy(cgroupV2DirPath string, attachType BPFAttachType) (*BPFLink, error) { + bpfLink, err := p.AttachCgroup(cgroupV2DirPath) + if err == nil { + return bpfLink, nil + } + // Try the legacy attachment method before fully failing + cgroupDirFD, err := getCgroupDirFD(cgroupV2DirPath) + if err != nil { + return nil, err + } + defer syscall.Close(cgroupDirFD) + progFD := C.bpf_program__fd(p.prog) + ret := C.bpf_prog_attach_cgroup_legacy(progFD, C.int(cgroupDirFD), C.int(attachType)) + if ret < 0 { + return nil, fmt.Errorf("failed to attach (legacy) program %s to cgroupv2 %s", p.name, cgroupV2DirPath) + } + dirName := strings.ReplaceAll(cgroupV2DirPath[1:], "/", "-") + + bpfLinkLegacy := &BPFLinkLegacy{ + attachType: attachType, + cgroupDir: cgroupV2DirPath, + } + fakeBpfLink := &BPFLink{ + link: nil, // detach/destroy made with progfd + prog: p, + eventName: fmt.Sprintf("cgroup-%s-%s", p.name, dirName), + // info bellow needed for detach (there isn't a real ebpf link) + linkType: CgroupLegacy, + legacy: bpfLinkLegacy, + } + return fakeBpfLink, nil +} + +// DetachCgroupLegacy detaches the BPFProg from a cgroup described by the given +// fd. This is needed because in legacy attachment there is no BPFLink, just a +// fake one (kernel did not support it, nor libbpf). This function should be +// called by the (*BPFLink)->Destroy() function, since BPFLink is emulated (so +// users donĀ“t need to distinguish between regular and legacy cgroup +// detachments). +func (p *BPFProg) DetachCgroupLegacy(cgroupV2DirPath string, attachType BPFAttachType) error { + cgroupDirFD, err := getCgroupDirFD(cgroupV2DirPath) + if err != nil { + return err + } + defer syscall.Close(cgroupDirFD) + progFD := C.bpf_program__fd(p.prog) + ret := C.bpf_prog_detach_cgroup_legacy(progFD, C.int(cgroupDirFD), C.int(attachType)) + if ret < 0 { + return fmt.Errorf("failed to detach (legacy) program %s from cgroupv2 %s", p.name, cgroupV2DirPath) + } + return nil +} + func (p *BPFProg) AttachXDP(deviceName string) (*BPFLink, error) { iface, err := net.InterfaceByName(deviceName) if err != nil { diff --git a/selftest/cgroup-legacy/Makefile b/selftest/cgroup-legacy/Makefile new file mode 120000 index 00000000..d981720c --- /dev/null +++ b/selftest/cgroup-legacy/Makefile @@ -0,0 +1 @@ +../common/Makefile \ No newline at end of file diff --git a/selftest/cgroup-legacy/go.mod b/selftest/cgroup-legacy/go.mod new file mode 100644 index 00000000..e3cb97fd --- /dev/null +++ b/selftest/cgroup-legacy/go.mod @@ -0,0 +1,7 @@ +module github.com/aquasecurity/libbpfgo/selftest/perfbuffers + +go 1.18 + +require github.com/aquasecurity/libbpfgo v0.2.1-libbpf-0.4.0 + +replace github.com/aquasecurity/libbpfgo => ../../ diff --git a/selftest/cgroup-legacy/go.sum b/selftest/cgroup-legacy/go.sum new file mode 100644 index 00000000..e69de29b diff --git a/selftest/cgroup-legacy/main.bpf.c b/selftest/cgroup-legacy/main.bpf.c new file mode 100644 index 00000000..b9a68989 --- /dev/null +++ b/selftest/cgroup-legacy/main.bpf.c @@ -0,0 +1,64 @@ +//+build ignore +#include + +#include +#include +#include +#include + +#ifdef asm_inline +#undef asm_inline +#define asm_inline asm +#endif + +#define ETH_P_IP 0x0800 + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 10); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} perfbuffer SEC(".maps"); + +SEC("cgroup_skb/ingress") +int cgroup__skb_ingress(struct __sk_buff *ctx) +{ + if (ctx->protocol != bpf_htons(ETH_P_IP)) // ethernet (IP) only + return 1; + + struct bpf_sock *sk = ctx->sk; + if (!sk) { + bpf_printk("ERROR: cgroup_skb/ingress: could not get bpf_sock"); + return 1; + } + + sk = bpf_sk_fullsock(sk); + if (!sk) { + bpf_printk("ERROR: cgroup_skb/ingress: could not get full bpf_sock"); + return 1; + } + + struct iphdr ip = {0}; + if (bpf_skb_load_bytes_relative(ctx, 0, &ip, sizeof(ip), BPF_HDR_START_NET)) + return 1; + + struct icmphdr icmp = {0}; + + switch (ip.protocol) { + case IPPROTO_ICMP: + if (bpf_skb_load_bytes_relative(ctx, + sizeof(ip), + &icmp, + sizeof(struct icmphdr), + BPF_HDR_START_NET)) + return 1; + + u32 bleh = 20220823; + bpf_perf_event_output(ctx, &perfbuffer, BPF_F_CURRENT_CPU, &bleh, sizeof(bleh)); + break; + } + + return 1; +} + +char LICENSE[] SEC("license") = "GPL"; \ No newline at end of file diff --git a/selftest/cgroup-legacy/main.go b/selftest/cgroup-legacy/main.go new file mode 100644 index 00000000..d18d5e60 --- /dev/null +++ b/selftest/cgroup-legacy/main.go @@ -0,0 +1,127 @@ +package main + +import "C" + +import ( + "context" + "encoding/binary" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "os/signal" + "regexp" + "runtime" + "syscall" + "time" + + bpf "github.com/aquasecurity/libbpfgo" +) + +var reCgroup2Mount = regexp.MustCompile(`(?m)^cgroup2\s(/\S+)\scgroup2\s`) + +func main() { + bpfModule, err := bpf.NewModuleFromFile("main.bpf.o") + if err != nil { + Error(err) + } + defer bpfModule.Close() + + err = bpfModule.BPFLoadObject() + if err != nil { + Error(err) + } + + prog, err := bpfModule.GetProgram("cgroup__skb_ingress") + if err != nil { + Error(err) + } + + cgroupRootDir := getCgroupV2RootDir() + + link, err := prog.AttachCgroupLegacy(cgroupRootDir, bpf.BPFAttachTypeCgroupInetIngress) + if err != nil { + Error(err) + } + + eventsChannel := make(chan []byte, 100) + lostChannel := make(chan uint64, 10) + + // initialize an eBPF perf buffer to receive events + bpfPerfBuffer, err := bpfModule.InitPerfBuf( + "perfbuffer", eventsChannel, lostChannel, 1, + ) + if err != nil { + Error(err) + } + + // signal handling + ctx, stop := signal.NotifyContext( + context.Background(), syscall.SIGINT, syscall.SIGTERM, + ) + defer stop() + + // start eBPF perf buffer event polling + bpfPerfBuffer.Start() + + go func() { + _, err := exec.Command("ping", "127.0.0.1", "-c 5", "-w 10").Output() + if err != nil { + Error(err) + } + time.Sleep(time.Second) + stop() + }() + + testPassed := false + numberOfEventsReceived := 0 +LOOP: + for { + select { + case raw := <-eventsChannel: + value := int(binary.LittleEndian.Uint32(raw)) + if value == 20220823 { + fmt.Println("Received correct event.") + numberOfEventsReceived++ + if numberOfEventsReceived >= 5 { + testPassed = true + break LOOP + } + } + case <-ctx.Done(): + break LOOP + } + } + + err = link.Destroy() + if err != nil { + Error(err) + } + + if !testPassed { + Error(fmt.Errorf("unable to get all packets")) + } + + os.Exit(0) +} + +func getCgroupV2RootDir() string { + data, err := ioutil.ReadFile("/proc/mounts") + if err != nil { + fmt.Fprintf(os.Stderr, "read /proc/mounts failed: %+v\n", err) + os.Exit(-1) + } + items := reCgroup2Mount.FindStringSubmatch(string(data)) + if len(items) < 2 { + fmt.Fprintln(os.Stderr, "cgroupv2 is not mounted") + os.Exit(-1) + } + return items[1] +} + +func Error(err error) { + _, fn, line, _ := runtime.Caller(1) + log.Printf("ERROR: %s:%d %v\n", fn, line, err) + os.Exit(1) +} diff --git a/selftest/cgroup-legacy/run.sh b/selftest/cgroup-legacy/run.sh new file mode 120000 index 00000000..3355de25 --- /dev/null +++ b/selftest/cgroup-legacy/run.sh @@ -0,0 +1 @@ +../common/run-warn-bt-5.4.sh \ No newline at end of file diff --git a/selftest/common/common.sh b/selftest/common/common.sh index d4ac5910..0064854c 100644 --- a/selftest/common/common.sh +++ b/selftest/common/common.sh @@ -2,6 +2,7 @@ GREEN='\033[0;01;32m' RED='\033[0;01;31m' +YELLOW='\033[0;01;33m' NC='\033[0m' ## error handling @@ -10,22 +11,27 @@ error() { echo -e "${RED}[!] ERROR: $1${NC}"; } +warn() { + echo -e "${YELLOW}[!] WARNING: $1${NC}"; +} + okay() { echo -e "${GREEN}[*] SUCCESS: $1${NC}"; } -errexit() { error "$1"; exit 1; } -errnull() { exit 2; } # reserved for 'make' (always ret 2 on errors) -errtimeout() { error "$1"; exit 3; } -errfailure() { error "$1"; exit 4; } -okexit() { okay "$1"; exit 0; } -okcontinue() { okay "$1"; } +errexit() { error "$1"; exit 1; } +warnexit() { warn "$1"; exit 0; } +warncontinue() { warn "$1"; } +errnull() { exit 2; } # reserved for 'make' (always ret 2 on errors) +errtimeout() { error "$1"; exit 3; } +errfailure() { error "$1"; exit 4; } +okexit() { okay "$1"; exit 0; } +okcontinue() { okay "$1"; } ## kernel version checks kern_version() { - - _oper=$1; _version=$2; + _oper=$1; _version=$2; _notfatal=$3; _given=$(($(echo $_version | sed 's:\.::g'))) _current=$(($(uname -r | cut -d'.' -f1,2 | sed 's:\.::g'))) @@ -52,9 +58,12 @@ kern_version() { esac if [[ $_opergood -ne 1 ]]; then - errexit "kernel $_current not $_oper than $_given" + if [[ $_notfatal -eq 1 ]]; then + warncontinue "kernel $_current not $_oper than $_given" + else + errexit "kernel $_current not $_oper than $_given" + fi fi - } ## checks @@ -65,10 +74,15 @@ check_build() { } check_ppid() { - _ppid=$(ps -o ppid= $$); _pppid=$(ps -o ppid= $_ppid); - _comm=$(ps -o comm= $_ppid); _pcomm=$(ps -o comm= $_pppid); + _ppid=$(ps -o ppid= $$); + _pppid=$(ps -o ppid= $_ppid); + _ppppid=$(ps -o ppid= $_pppid); + + _comm=$(ps -o comm= $_ppid); + _pcomm=$(ps -o comm= $_pppid); + _ppcomm=$(ps -o comm= $_ppppid); - if [ "$_comm" != "make" ] && [ "$_pcomm" != "make" ]; then + if [[ $_comm != make && $_pcomm != make && $_ppcomm != make ]]; then errexit "do a 'make run' instead"; fi } diff --git a/selftest/common/run-5.8.sh b/selftest/common/run-5.8.sh index 106723ad..8480b8f1 100755 --- a/selftest/common/run-5.8.sh +++ b/selftest/common/run-5.8.sh @@ -2,8 +2,8 @@ # SETTINGS -TEST=$(dirname $0)/$1 # execute -TIMEOUT=5 # seconds +TEST=$(dirname $0)/$1 +TIMEOUT=5 # COMMON diff --git a/selftest/common/run-warn-bt-5.4.sh b/selftest/common/run-warn-bt-5.4.sh new file mode 100755 index 00000000..7dc63a56 --- /dev/null +++ b/selftest/common/run-warn-bt-5.4.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# SETTINGS + +TEST=$(dirname $0)/$1 +TIMEOUT=5 + +# COMMON + +COMMON="$(dirname $0)/../common/common.sh" +[[ -f $COMMON ]] && { . $COMMON; } || { error "no common"; exit 1; } + +# MAIN + +kern_version le 5.4 1 # 1 == not fatal (skip if greater or equal to 5.4) + +check_build +check_ppid +test_exec +test_finish + +exit 0