diff --git a/KubeArmor/config/config.go b/KubeArmor/config/config.go index 4a71d8700..1ea1287df 100644 --- a/KubeArmor/config/config.go +++ b/KubeArmor/config/config.go @@ -27,6 +27,8 @@ type KubearmorConfig struct { LogPath string // Log file to use SELinuxProfileDir string // Directory to store SELinux profiles CRISocket string // Container runtime to use + NRISocket string // NRI socket to use + NRIIndex string // NRI socket to use Visibility string // Container visibility to use HostVisibility string // Host visibility to use @@ -82,6 +84,8 @@ const ( ConfigLogPath string = "logPath" ConfigSELinuxProfileDir string = "seLinuxProfileDir" ConfigCRISocket string = "criSocket" + ConfigNRISocket string = "nriSocket" + ConfigNRIIndex string = "nriIndex" ConfigVisibility string = "visibility" ConfigHostVisibility string = "hostVisibility" ConfigKubearmorPolicy string = "enableKubeArmorPolicy" @@ -122,6 +126,8 @@ func readCmdLineParams() { logStr := flag.String(ConfigLogPath, "none", "log file path, {path|stdout|none}") seLinuxProfileDirStr := flag.String(ConfigSELinuxProfileDir, "/tmp/kubearmor.selinux", "SELinux profile directory") criSocket := flag.String(ConfigCRISocket, "", "path to CRI socket (format: unix:///path/to/file.sock)") + nriSocket := flag.String(ConfigNRISocket, "", "path to NRI socket (format: /path/to/file.sock)") + nriIndex := flag.String(ConfigNRIIndex, "99", "NRI plugin index") visStr := flag.String(ConfigVisibility, "process,file,network,capabilities", "Container Visibility to use [process,file,network,capabilities,none]") hostVisStr := flag.String(ConfigHostVisibility, "default", "Host Visibility to use [process,file,network,capabilities,none] (default \"none\" for k8s, \"process,file,network,capabilities\" for VM)") @@ -185,6 +191,8 @@ func readCmdLineParams() { viper.SetDefault(ConfigLogPath, *logStr) viper.SetDefault(ConfigSELinuxProfileDir, *seLinuxProfileDirStr) viper.SetDefault(ConfigCRISocket, *criSocket) + viper.SetDefault(ConfigNRISocket, *nriSocket) + viper.SetDefault(ConfigNRIIndex, *nriIndex) viper.SetDefault(ConfigVisibility, *visStr) viper.SetDefault(ConfigHostVisibility, *hostVisStr) @@ -275,6 +283,13 @@ func LoadConfig() error { return fmt.Errorf("CRI socket must start with 'unix://' (%s is invalid)", GlobalCfg.CRISocket) } + GlobalCfg.NRISocket = os.Getenv("NRI_SOCKET") + if GlobalCfg.NRISocket == "" { + GlobalCfg.NRISocket = viper.GetString(ConfigNRISocket) + } + + GlobalCfg.NRIIndex = viper.GetString(ConfigNRIIndex) + GlobalCfg.Visibility = viper.GetString(ConfigVisibility) GlobalCfg.HostVisibility = viper.GetString(ConfigHostVisibility) diff --git a/KubeArmor/core/kubeArmor.go b/KubeArmor/core/kubeArmor.go index 051842b81..f626ca5a1 100644 --- a/KubeArmor/core/kubeArmor.go +++ b/KubeArmor/core/kubeArmor.go @@ -589,7 +589,10 @@ func KubeArmor() { dm.SetContainerNSVisibility() // monitor containers - if strings.Contains(cfg.GlobalCfg.CRISocket, "docker") { + if cfg.GlobalCfg.NRISocket != "" { + // monitor NRI events + go dm.MonitorNRIEvents() + } else if strings.Contains(cfg.GlobalCfg.CRISocket, "docker") { // update already deployed containers dm.GetAlreadyDeployedDockerContainers() // monitor docker events @@ -611,8 +614,10 @@ func KubeArmor() { } if dm.K8sEnabled && cfg.GlobalCfg.Policy { - // check if the CRI socket set while executing kubearmor exists - if cfg.GlobalCfg.CRISocket != "" { + if cfg.GlobalCfg.NRISocket != "" { + // monitor NRI events + go dm.MonitorNRIEvents() + } else if cfg.GlobalCfg.CRISocket != "" { // check if the CRI socket set while executing kubearmor exists trimmedSocket := strings.TrimPrefix(cfg.GlobalCfg.CRISocket, "unix://") if _, err := os.Stat(trimmedSocket); err != nil { dm.Logger.Warnf("Error while looking for CRI socket file: %s", err.Error()) diff --git a/KubeArmor/core/nriHandler.go b/KubeArmor/core/nriHandler.go new file mode 100644 index 000000000..7e7d536ec --- /dev/null +++ b/KubeArmor/core/nriHandler.go @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright 2021 Authors of KubeArmor + +package core + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/containerd/nri/pkg/api" + "github.com/containerd/nri/pkg/stub" + kl "github.com/kubearmor/KubeArmor/KubeArmor/common" + cfg "github.com/kubearmor/KubeArmor/KubeArmor/config" + kg "github.com/kubearmor/KubeArmor/KubeArmor/log" + tp "github.com/kubearmor/KubeArmor/KubeArmor/types" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// NRI Handler +var NRI *NRIHandler + +type namespaceKey struct { + PidNS uint32 + MntNS uint32 +} + +// namespaceKeyFromContainer creates a namespaceKey from a container. +func namespaceKeyFromContainer(container tp.Container) namespaceKey { + return namespaceKey{ + PidNS: container.PidNS, + MntNS: container.MntNS, + } +} + +// NRIHandler connects to an NRI socket and informs on container +// creation/deletion events. +type NRIHandler struct { + // NRI plugin stub + stub stub.Stub + + // active containers + containers map[string]tp.Container + + containersByNamespaces map[namespaceKey]string + + handleDeletedContainer func(tp.Container) + handleNewContainer func(tp.Container) +} + +// NewNRIHandler creates a new NRIHandler with the given event callbacks. +func NewNRIHandler( + handleDeletedContainer func(tp.Container), + handleNewContainer func(tp.Container), +) *NRIHandler { + nri := &NRIHandler{} + + opts := []stub.Option{ + stub.WithSocketPath(cfg.GlobalCfg.NRISocket), + stub.WithPluginIdx(cfg.GlobalCfg.NRIIndex), + } + + stub, err := stub.New(nri, opts...) + if err != nil { + kg.Errf("Failed to create NRI stub: %s", err.Error()) + return nil + } + + nri.containers = map[string]tp.Container{} + nri.containersByNamespaces = map[namespaceKey]string{} + nri.stub = stub + nri.handleDeletedContainer = handleDeletedContainer + nri.handleNewContainer = handleNewContainer + + return nri +} + +// Start initiates a configured NRI connection. +func (nh *NRIHandler) Start() { + go func() { + err := nh.stub.Run(context.Background()) + if err != nil { + kg.Errf("Failed to connect to NRI: %s", err.Error()) + } + }() +} + +// Stop closes the NRI connection. +func (nh *NRIHandler) Close() { + nh.stub.Stop() +} + +// Synchronize is an NRI callback which is called at the beginning of an NRI +// socket connection to inform on all existing containers. +func (nh *NRIHandler) Synchronize( + _ context.Context, + _ []*api.PodSandbox, + nriContainers []*api.Container, +) ([]*api.ContainerUpdate, error) { + for _, nriContainer := range nriContainers { + container := nriToKubeArmorContainer(nriContainer) + container = nh.mergeContainer(container, false) + + // Overlapping namespace IDs between containers should be impossible + // here + namespaceKey := namespaceKeyFromContainer(container) + nh.containersByNamespaces[namespaceKey] = container.ContainerID + + nh.handleNewContainer(container) + } + + return nil, nil +} + +// StartContainer is an NRI callback which is called after a container has +// started. +// +// Unfortunately we can't use the CreateContainer or PostCreateContainer NRI +// callbacks because they are called without a PID value, which is required in +// order to get the PID and mount namespaces of the container. This means that +// there is a short period of time between a container starting and us enforcing +// it. +// +// If StartContainer detects a container namespace ID overlap with a previous +// container (since Linux can reuse namespace IDs), it will override the old +// policy correctly, but any actions runc took to set up this container and +// start it will be logged/enforced as if they were the old container's actions. +// This should be exceedingly rare, but there's no way using just NRI that we +// can entirely avoid this scenario. +func (nh *NRIHandler) StartContainer( + _ context.Context, + _ *api.PodSandbox, + nriContainer *api.Container, +) error { + container := nriToKubeArmorContainer(nriContainer) + container = nh.mergeContainer(container, false) + + namespaceKey := namespaceKeyFromContainer(container) + + // It's technically possible for a container to crash and a new one to be + // started, all before we receive the StopContainer event. And because Linux + // can reuse namespace IDs, it's possible for the enforcement configuration + // to get confused and messed up, so if namespace IDs ever overlap, we + // assume the previous container using those namespaces has already exited. + if oldContainerID, ok := nh.containersByNamespaces[namespaceKey]; ok { + delete(nh.containers, container.ContainerID) + + nh.handleDeletedContainer(nh.containers[oldContainerID]) + } + + nh.containersByNamespaces[namespaceKey] = container.ContainerID + + nh.handleNewContainer(container) + + return nil +} + +// StopContainer is an NRI callback which is called before a container receives +// the signal to stop. +// +// StopContainer is called synchronously before a termination signal is sent to +// a container, so we can be sure that we stop enforcing before the container +// shuts down, at least in most cases. This means that if a new container reuses +// Linux namespace IDs from a previous container, so long as that previous +// container didn't crash unexpectedly, we can be sure that we won't +// accidentally enforce the new container with the old container's policy. +// +// The tradeoff here is that once a container receives its termination signal, +// KubeArmor is no longer enforcing anything on it while it shuts down. +func (nh *NRIHandler) StopContainer( + _ context.Context, + _ *api.PodSandbox, + nriContainer *api.Container, +) ([]*api.ContainerUpdate, error) { + container := nriToKubeArmorContainer(nriContainer) + container = nh.mergeContainer(container, true) + + // Only handle the container deleted event if it wasn't already 'deleted' by + // the StartContainer event (due to a Linux namespace ID collision). + if _, ok := nh.containersByNamespaces[namespaceKeyFromContainer(container)]; ok { + delete(nh.containers, container.ContainerID) + + nh.handleDeletedContainer(container) + } + + return nil, nil +} + +// RemoveContainer is an NRI callback which is called after a container has +// exited. +// +// In case StopContainer isn't called, we hook into RemoveContainer to ensure +// that we stop enforcing a container after it has exited. For example, the NRI +// API doesn't guarantee that StopContainer will be called if a container +// crashed unexpectedly. +func (nh *NRIHandler) RemoveContainer( + _ context.Context, + _ *api.PodSandbox, + nriContainer *api.Container, +) ([]*api.ContainerUpdate, error) { + container := nriToKubeArmorContainer(nriContainer) + container = nh.mergeContainer(container, true) + + // Only handle the container deleted event if it wasn't already 'deleted' by + // the StartContainer event (due to a Linux namespace ID collision) or + // StopContainer event. + if _, ok := nh.containersByNamespaces[namespaceKeyFromContainer(container)]; ok { + delete(nh.containers, container.ContainerID) + + nh.handleDeletedContainer(container) + } + + return nil, nil +} + +// mergeContainer updates the container with the container's previously-stored +// namespace IDs, if any, also storing namespaceIDs for future reference. +func (nh *NRIHandler) mergeContainer(container tp.Container, removing bool) tp.Container { + if existing, ok := nh.containers[container.ContainerID]; ok { + if existing.PidNS != 0 { + container.PidNS = existing.PidNS + } + + if existing.MntNS != 0 { + container.MntNS = existing.MntNS + } + + nh.containers[container.ContainerID] = container + } else if !removing { + nh.containers[container.ContainerID] = container + } + + return container +} + +// nriToKubeArmorContainer converts an NRI container to a KubeArmor container. +func nriToKubeArmorContainer(nriContainer *api.Container) tp.Container { + container := tp.Container{} + + container.ContainerID = nriContainer.Id + container.ContainerName = nriContainer.Name + + container.NamespaceName = "Unknown" + container.EndPointName = "Unknown" + + if _, ok := nriContainer.Labels["io.kubernetes.pod.namespace"]; ok { + container.NamespaceName = nriContainer.Labels["io.kubernetes.pod.namespace"] // Pod namespace + + if _, ok := nriContainer.Labels["io.kubernetes.pod.name"]; ok { + container.EndPointName = nriContainer.Labels["io.kubernetes.pod.name"] // Pod name + } + } + + var podName string + var podNamespace string + + if name, ok := nriContainer.Labels["io.kubernetes.pod.name"]; ok { + podName = name + } + if namespace, ok := nriContainer.Labels["io.kubernetes.pod.namespace"]; ok { + podNamespace = namespace + } + + pod, err := K8s.K8sClient.CoreV1().Pods(podNamespace).Get(context.TODO(), podName, metav1.GetOptions{}) + if err != nil { + kg.Warnf("failed to fetch Pod: %w\n", err) + } + + if appArmorProfile, ok := pod.Annotations["container.apparmor.security.beta.kubernetes.io/"+nriContainer.Name]; ok { + profile := strings.Split(appArmorProfile, "/") + if len(profile) > 1 { + container.AppArmorProfile = profile[1] + } + } + + // Read PID and mount namespaces from container root PID + if nriContainer.Pid != 0 { + pid := strconv.Itoa(int(nriContainer.Pid)) + + if data, err := os.Readlink(filepath.Join(cfg.GlobalCfg.ProcFsMount, pid, "/ns/pid")); err == nil { + if _, err := fmt.Sscanf(data, "pid:[%d]", &container.PidNS); err != nil { + kg.Warnf("Unable to get PidNS (%s, %s, %s)", nriContainer.Id, nriContainer.Pid, err.Error()) + } + } + + if data, err := os.Readlink(filepath.Join(cfg.GlobalCfg.ProcFsMount, pid, "/ns/mnt")); err == nil { + if _, err := fmt.Sscanf(data, "mnt:[%d]", &container.MntNS); err != nil { + kg.Warnf("Unable to get MntNS (%s, %s, %s)", nriContainer.Id, nriContainer.Pid, err.Error()) + } + } + } + + return container +} + +// MonitorNRIEvents monitors NRI events. +func (dm *KubeArmorDaemon) MonitorNRIEvents() { + dm.WgDaemon.Add(1) + defer dm.WgDaemon.Done() + + handleDeletedContainer := func(container tp.Container) { + dm.ContainersLock.Lock() + _, ok := dm.Containers[container.ContainerID] + if !ok { + dm.ContainersLock.Unlock() + return + } + if !dm.K8sEnabled { + dm.EndPointsLock.Lock() + dm.MatchandRemoveContainerFromEndpoint(container.ContainerID) + dm.EndPointsLock.Unlock() + } + delete(dm.Containers, container.ContainerID) + dm.ContainersLock.Unlock() + + // TODO: Can't update AppArmor profiles since we can't get them from NRI + + if dm.SystemMonitor != nil && cfg.GlobalCfg.Policy { + // update NsMap + dm.SystemMonitor.DeleteContainerIDFromNsMap(container.ContainerID, container.NamespaceName, container.PidNS, container.MntNS) + dm.RuntimeEnforcer.UnregisterContainer(container.ContainerID) + } + + dm.Logger.Printf("Detected a container (removed/%.12s/pidns=%d/mntns=%d)", container.ContainerID, container.PidNS, container.MntNS) + } + + handleNewContainer := func(container tp.Container) { + endpoint := tp.EndPoint{} + + dm.ContainersLock.Lock() + + if len(dm.OwnerInfo) > 0 { + if podOwnerInfo, ok := dm.OwnerInfo[container.EndPointName]; ok { + container.Owner = podOwnerInfo + } + } + + if _, ok := dm.Containers[container.ContainerID]; !ok { + dm.Containers[container.ContainerID] = container + dm.ContainersLock.Unlock() + } else if dm.Containers[container.ContainerID].PidNS == 0 && dm.Containers[container.ContainerID].MntNS == 0 { + // this entry was updated by kubernetes before docker detects it + // thus, we here use the info given by kubernetes instead of the info given by docker + + container.NamespaceName = dm.Containers[container.ContainerID].NamespaceName + container.EndPointName = dm.Containers[container.ContainerID].EndPointName + container.Labels = dm.Containers[container.ContainerID].Labels + + container.ContainerName = dm.Containers[container.ContainerID].ContainerName + container.ContainerImage = dm.Containers[container.ContainerID].ContainerImage + + container.PolicyEnabled = dm.Containers[container.ContainerID].PolicyEnabled + + container.ProcessVisibilityEnabled = dm.Containers[container.ContainerID].ProcessVisibilityEnabled + container.FileVisibilityEnabled = dm.Containers[container.ContainerID].FileVisibilityEnabled + container.NetworkVisibilityEnabled = dm.Containers[container.ContainerID].NetworkVisibilityEnabled + container.CapabilitiesVisibilityEnabled = dm.Containers[container.ContainerID].CapabilitiesVisibilityEnabled + + dm.Containers[container.ContainerID] = container + dm.ContainersLock.Unlock() + + dm.EndPointsLock.Lock() + for idx, endPoint := range dm.EndPoints { + if endPoint.NamespaceName == container.NamespaceName && endPoint.EndPointName == container.EndPointName && kl.ContainsElement(endPoint.Containers, container.ContainerID) { + // update containers + if !kl.ContainsElement(endPoint.Containers, container.ContainerID) { // does not make sense but need to verify + dm.EndPoints[idx].Containers = append(dm.EndPoints[idx].Containers, container.ContainerID) + } + + endpoint = dm.EndPoints[idx] + + break + } + } + dm.EndPointsLock.Unlock() + } else { + dm.ContainersLock.Unlock() + return + } + + if dm.SystemMonitor != nil && cfg.GlobalCfg.Policy { + // update NsMap + dm.SystemMonitor.AddContainerIDToNsMap(container.ContainerID, container.NamespaceName, container.PidNS, container.MntNS) + dm.RuntimeEnforcer.RegisterContainer(container.ContainerID, container.PidNS, container.MntNS) + + if len(endpoint.SecurityPolicies) > 0 { // struct can be empty or no policies registered for the endpoint yet + dm.Logger.UpdateSecurityPolicies("ADDED", endpoint) + if dm.RuntimeEnforcer != nil && endpoint.PolicyEnabled == tp.KubeArmorPolicyEnabled { + // enforce security policies + dm.RuntimeEnforcer.UpdateSecurityPolicies(endpoint) + } + } + } + + if !dm.K8sEnabled { + dm.ContainersLock.Lock() + dm.EndPointsLock.Lock() + dm.MatchandUpdateContainerSecurityPolicies(container.ContainerID) + dm.EndPointsLock.Unlock() + dm.ContainersLock.Unlock() + } + + dm.Logger.Printf("Detected a container (added/%.12s/pidns=%d/mntns=%d)", container.ContainerID, container.PidNS, container.MntNS) + } + + NRI = NewNRIHandler(handleDeletedContainer, handleNewContainer) + + // check if NRI exists + if NRI == nil { + return + } + + NRI.Start() + + dm.Logger.Print("Started to monitor NRI events") +} diff --git a/KubeArmor/go.mod b/KubeArmor/go.mod index 583eaa810..b12d548cf 100644 --- a/KubeArmor/go.mod +++ b/KubeArmor/go.mod @@ -28,6 +28,7 @@ require ( github.com/cilium/cilium v1.14.12 github.com/cilium/ebpf v0.12.3 github.com/containerd/containerd v1.7.13 + github.com/containerd/nri v0.6.0 github.com/containerd/typeurl/v2 v2.1.1 github.com/docker/docker v25.0.5+incompatible github.com/golang/protobuf v1.5.4 diff --git a/KubeArmor/go.sum b/KubeArmor/go.sum index f4d440c00..e0e03f15b 100644 --- a/KubeArmor/go.sum +++ b/KubeArmor/go.sum @@ -29,6 +29,8 @@ github.com/containerd/containerd v1.7.13 h1:wPYKIeGMN8vaggSKuV1X0wZulpMz4CrgEsZd github.com/containerd/containerd v1.7.13/go.mod h1:zT3up6yTRfEUa6+GsITYIJNgSVL9NQ4x4h1RPzk0Wu4= github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= +github.com/containerd/nri v0.6.0 h1:hdztxwL0gCS1CrCa9bvD1SoJiFN4jBuRQhplCvCPMj8= +github.com/containerd/nri v0.6.0/go.mod h1:F7OZfO4QTPqw5r87aq+syZJwiVvRYLIlHZiZDBV1W3A= github.com/containerd/ttrpc v1.2.3 h1:4jlhbXIGvijRtNC8F/5CpuJZ7yKOBFGFOOXg1bkISz0= github.com/containerd/ttrpc v1.2.3/go.mod h1:ieWsXucbb8Mj9PH0rXCw1i8IunRbbAiDkpXkbfflWBM= github.com/containerd/typeurl/v2 v2.1.1 h1:3Q4Pt7i8nYwy2KmQWIw2+1hTvwTE/6w9FqcttATPO/4=