diff --git a/go.mod b/go.mod index c3f8b87a4..2a8f81a77 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,7 @@ require ( github.com/onsi/gomega v1.30.0 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.0-rc5 - github.com/opencontainers/runc v1.1.10 + github.com/opencontainers/runc v1.1.11 github.com/opencontainers/runtime-spec v1.1.0 github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc github.com/opencontainers/selinux v1.11.0 diff --git a/go.sum b/go.sum index 1085d5e04..f1e5cbcb4 100644 --- a/go.sum +++ b/go.sum @@ -310,8 +310,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0-rc5 h1:Ygwkfw9bpDvs+c9E34SdgGOj41dX/cbdlwvlWt0pnFI= github.com/opencontainers/image-spec v1.1.0-rc5/go.mod h1:X4pATf0uXsnn3g5aiGIsVnJBR4mxhKzfwmvK/B2NTm8= -github.com/opencontainers/runc v1.1.10 h1:EaL5WeO9lv9wmS6SASjszOeQdSctvpbu0DdBQBizE40= -github.com/opencontainers/runc v1.1.10/go.mod h1:+/R6+KmDlh+hOO8NkjmgkG9Qzvypzk0yXxAPYYR65+M= +github.com/opencontainers/runc v1.1.11 h1:9LjxyVlE0BPMRP2wuQDRlHV4941Jp9rc3F0+YKimopA= +github.com/opencontainers/runc v1.1.11/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc h1:d2hUh5O6MRBvStV55MQ8we08t42zSTqBbscoQccWmMc= diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go index a0e780749..783566d68 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go @@ -170,6 +170,10 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { return err } stats.MemoryStats.SwapUsage = swapUsage + stats.MemoryStats.SwapOnlyUsage = cgroups.MemoryData{ + Usage: swapUsage.Usage - memoryUsage.Usage, + Failcnt: swapUsage.Failcnt - memoryUsage.Failcnt, + } kernelUsage, err := getMemoryData(path, "kmem") if err != nil { return err diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go index 9cca98c4c..01fe7d8e1 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go @@ -100,7 +100,7 @@ func statMemory(dirPath string, stats *cgroups.Stats) error { memoryUsage, err := getMemoryDataV2(dirPath, "") if err != nil { if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint { - // The root cgroup does not have memory.{current,max} + // The root cgroup does not have memory.{current,max,peak} // so emulate those using data from /proc/meminfo and // /sys/fs/cgroup/memory.stat return rootStatsFromMeminfo(stats) @@ -108,10 +108,12 @@ func statMemory(dirPath string, stats *cgroups.Stats) error { return err } stats.MemoryStats.Usage = memoryUsage - swapUsage, err := getMemoryDataV2(dirPath, "swap") + swapOnlyUsage, err := getMemoryDataV2(dirPath, "swap") if err != nil { return err } + stats.MemoryStats.SwapOnlyUsage = swapOnlyUsage + swapUsage := swapOnlyUsage // As cgroup v1 reports SwapUsage values as mem+swap combined, // while in cgroup v2 swap values do not include memory, // report combined mem+swap for v1 compatibility. @@ -119,6 +121,9 @@ func statMemory(dirPath string, stats *cgroups.Stats) error { if swapUsage.Limit != math.MaxUint64 { swapUsage.Limit += memoryUsage.Limit } + // The `MaxUsage` of mem+swap cannot simply combine mem with + // swap. So set it to 0 for v1 compatibility. + swapUsage.MaxUsage = 0 stats.MemoryStats.SwapUsage = swapUsage return nil @@ -133,6 +138,7 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) { } usage := moduleName + ".current" limit := moduleName + ".max" + maxUsage := moduleName + ".peak" value, err := fscommon.GetCgroupParamUint(path, usage) if err != nil { @@ -152,6 +158,14 @@ func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) { } memoryData.Limit = value + // `memory.peak` since kernel 5.19 + // `memory.swap.peak` since kernel 6.5 + value, err = fscommon.GetCgroupParamUint(path, maxUsage) + if err != nil && !os.IsNotExist(err) { + return cgroups.MemoryData{}, err + } + memoryData.MaxUsage = value + return memoryData, nil } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go index 40a81dd5a..0d8371b05 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go @@ -78,6 +78,8 @@ type MemoryStats struct { Usage MemoryData `json:"usage,omitempty"` // usage of memory + swap SwapUsage MemoryData `json:"swap_usage,omitempty"` + // usage of swap only + SwapOnlyUsage MemoryData `json:"swap_only_usage,omitempty"` // usage of kernel memory KernelUsage MemoryData `json:"kernel_usage,omitempty"` // usage of kernel TCP memory diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index c1b4a0041..6ebf5ec7b 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -21,9 +21,9 @@ type Rlimit struct { // IDMap represents UID/GID Mappings for User Namespaces. type IDMap struct { - ContainerID int `json:"container_id"` - HostID int `json:"host_id"` - Size int `json:"size"` + ContainerID int64 `json:"container_id"` + HostID int64 `json:"host_id"` + Size int64 `json:"size"` } // Seccomp represents syscall restrictions diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go index 8c02848b7..51fe94074 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go @@ -1,6 +1,10 @@ package configs -import "errors" +import ( + "errors" + "fmt" + "math" +) var ( errNoUIDMap = errors.New("User namespaces enabled, but no uid mappings found.") @@ -16,11 +20,18 @@ func (c Config) HostUID(containerId int) (int, error) { if c.UidMappings == nil { return -1, errNoUIDMap } - id, found := c.hostIDFromMapping(containerId, c.UidMappings) + id, found := c.hostIDFromMapping(int64(containerId), c.UidMappings) if !found { return -1, errNoUserMap } - return id, nil + // If we are a 32-bit binary running on a 64-bit system, it's possible + // the mapped user is too large to store in an int, which means we + // cannot do the mapping. We can't just return an int64, because + // os.Setuid() takes an int. + if id > math.MaxInt { + return -1, fmt.Errorf("mapping for uid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt) + } + return int(id), nil } // Return unchanged id. return containerId, nil @@ -39,11 +50,18 @@ func (c Config) HostGID(containerId int) (int, error) { if c.GidMappings == nil { return -1, errNoGIDMap } - id, found := c.hostIDFromMapping(containerId, c.GidMappings) + id, found := c.hostIDFromMapping(int64(containerId), c.GidMappings) if !found { return -1, errNoGroupMap } - return id, nil + // If we are a 32-bit binary running on a 64-bit system, it's possible + // the mapped user is too large to store in an int, which means we + // cannot do the mapping. We can't just return an int64, because + // os.Setgid() takes an int. + if id > math.MaxInt { + return -1, fmt.Errorf("mapping for gid %d (host id %d) is larger than native integer size (%d)", containerId, id, math.MaxInt) + } + return int(id), nil } // Return unchanged id. return containerId, nil @@ -57,7 +75,7 @@ func (c Config) HostRootGID() (int, error) { // Utility function that gets a host ID for a container ID from user namespace map // if that ID is present in the map. -func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) { +func (c Config) hostIDFromMapping(containerID int64, uMap []IDMap) (int64, bool) { for _, m := range uMap { if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) { hostID := m.HostID + (containerID - m.ContainerID) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps.c b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps.c new file mode 100644 index 000000000..84f2c6188 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps.c @@ -0,0 +1,79 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +/* + * All of the code here is run inside an aync-signal-safe context, so we need + * to be careful to not call any functions that could cause issues. In theory, + * since we are a Go program, there are fewer restrictions in practice, it's + * better to be safe than sorry. + * + * The only exception is exit, which we need to call to make sure we don't + * return into runc. + */ + +void bail(int pipefd, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vdprintf(pipefd, fmt, args); + va_end(args); + + exit(1); +} + +int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd) +{ + char buffer[4096] = { 0 }; + + pid_t child = fork(); + if (child != 0) + return child; + /* in child */ + + /* Join the target userns. */ + int nsfd = open(userns_path, O_RDONLY); + if (nsfd < 0) + bail(errfd, "open userns path %s failed: %m", userns_path); + + int err = setns(nsfd, CLONE_NEWUSER); + if (err < 0) + bail(errfd, "setns %s failed: %m", userns_path); + + close(nsfd); + + /* Pipe the requested file contents. */ + int fd = open(path, O_RDONLY); + if (fd < 0) + bail(errfd, "open %s in userns %s failed: %m", path, userns_path); + + int nread, ntotal = 0; + while ((nread = read(fd, buffer, sizeof(buffer))) != 0) { + if (nread < 0) + bail(errfd, "read bytes from %s failed (after %d total bytes read): %m", path, ntotal); + ntotal += nread; + + int nwritten = 0; + while (nwritten < nread) { + int n = write(outfd, buffer, nread - nwritten); + if (n < 0) + bail(errfd, "write %d bytes from %s failed (after %d bytes written): %m", + nread - nwritten, path, nwritten); + nwritten += n; + } + if (nread != nwritten) + bail(errfd, "mismatch for bytes read and written: %d read != %d written", nread, nwritten); + } + + close(fd); + close(outfd); + close(errfd); + + /* We must exit here, otherwise we would return into a forked runc. */ + exit(0); +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps_linux.go new file mode 100644 index 000000000..7a8c2b023 --- /dev/null +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps_linux.go @@ -0,0 +1,186 @@ +//go:build linux + +package userns + +import ( + "bufio" + "bytes" + "fmt" + "io" + "os" + "unsafe" + + "github.com/opencontainers/runc/libcontainer/configs" + "github.com/sirupsen/logrus" +) + +/* +#include +extern int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd); +*/ +import "C" + +func parseIdmapData(data []byte) (ms []configs.IDMap, err error) { + scanner := bufio.NewScanner(bytes.NewReader(data)) + for scanner.Scan() { + var m configs.IDMap + line := scanner.Text() + if _, err := fmt.Sscanf(line, "%d %d %d", &m.ContainerID, &m.HostID, &m.Size); err != nil { + return nil, fmt.Errorf("parsing id map failed: invalid format in line %q: %w", line, err) + } + ms = append(ms, m) + } + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("parsing id map failed: %w", err) + } + return ms, nil +} + +// Do something equivalent to nsenter --user= cat , but more +// efficiently. Returns the contents of the requested file from within the user +// namespace. +func spawnUserNamespaceCat(nsPath string, path string) ([]byte, error) { + rdr, wtr, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("create pipe for userns spawn failed: %w", err) + } + defer rdr.Close() + defer wtr.Close() + + errRdr, errWtr, err := os.Pipe() + if err != nil { + return nil, fmt.Errorf("create error pipe for userns spawn failed: %w", err) + } + defer errRdr.Close() + defer errWtr.Close() + + cNsPath := C.CString(nsPath) + defer C.free(unsafe.Pointer(cNsPath)) + cPath := C.CString(path) + defer C.free(unsafe.Pointer(cPath)) + + childPid := C.spawn_userns_cat(cNsPath, cPath, C.int(wtr.Fd()), C.int(errWtr.Fd())) + + if childPid < 0 { + return nil, fmt.Errorf("failed to spawn fork for userns") + } else if childPid == 0 { + // this should never happen + panic("runc executing inside fork child -- unsafe state!") + } + + // We are in the parent -- close the write end of the pipe before reading. + wtr.Close() + output, err := io.ReadAll(rdr) + rdr.Close() + if err != nil { + return nil, fmt.Errorf("reading from userns spawn failed: %w", err) + } + + // Ditto for the error pipe. + errWtr.Close() + errOutput, err := io.ReadAll(errRdr) + errRdr.Close() + if err != nil { + return nil, fmt.Errorf("reading from userns spawn error pipe failed: %w", err) + } + errOutput = bytes.TrimSpace(errOutput) + + // Clean up the child. + child, err := os.FindProcess(int(childPid)) + if err != nil { + return nil, fmt.Errorf("could not find userns spawn process: %w", err) + } + state, err := child.Wait() + if err != nil { + return nil, fmt.Errorf("failed to wait for userns spawn process: %w", err) + } + if !state.Success() { + errStr := string(errOutput) + if errStr == "" { + errStr = fmt.Sprintf("unknown error (status code %d)", state.ExitCode()) + } + return nil, fmt.Errorf("userns spawn: %s", errStr) + } else if len(errOutput) > 0 { + // We can just ignore weird output in the error pipe if the process + // didn't bail(), but for completeness output for debugging. + logrus.Debugf("userns spawn succeeded but unexpected error message found: %s", string(errOutput)) + } + // The subprocess succeeded, return whatever it wrote to the pipe. + return output, nil +} + +func GetUserNamespaceMappings(nsPath string) (uidMap, gidMap []configs.IDMap, err error) { + var ( + pid int + extra rune + tryFastPath bool + ) + + // nsPath is usually of the form /proc//ns/user, which means that we + // already have a pid that is part of the user namespace and thus we can + // just use the pid to read from /proc//*id_map. + // + // Note that Sscanf doesn't consume the whole input, so we check for any + // trailing data with %c. That way, we can be sure the pattern matched + // /proc/$pid/ns/user _exactly_ iff n === 1. + if n, _ := fmt.Sscanf(nsPath, "/proc/%d/ns/user%c", &pid, &extra); n == 1 { + tryFastPath = pid > 0 + } + + for _, mapType := range []struct { + name string + idMap *[]configs.IDMap + }{ + {"uid_map", &uidMap}, + {"gid_map", &gidMap}, + } { + var mapData []byte + + if tryFastPath { + path := fmt.Sprintf("/proc/%d/%s", pid, mapType.name) + data, err := os.ReadFile(path) + if err != nil { + // Do not error out here -- we need to try the slow path if the + // fast path failed. + logrus.Debugf("failed to use fast path to read %s from userns %s (error: %s), falling back to slow userns-join path", mapType.name, nsPath, err) + } else { + mapData = data + } + } else { + logrus.Debugf("cannot use fast path to read %s from userns %s, falling back to slow userns-join path", mapType.name, nsPath) + } + + if mapData == nil { + // We have to actually join the namespace if we cannot take the + // fast path. The path is resolved with respect to the child + // process, so just use /proc/self. + data, err := spawnUserNamespaceCat(nsPath, "/proc/self/"+mapType.name) + if err != nil { + return nil, nil, err + } + mapData = data + } + idMap, err := parseIdmapData(mapData) + if err != nil { + return nil, nil, fmt.Errorf("failed to parse %s of userns %s: %w", mapType.name, nsPath, err) + } + *mapType.idMap = idMap + } + + return uidMap, gidMap, nil +} + +// IsSameMapping returns whether or not the two id mappings are the same. Note +// that if the order of the mappings is different, or a mapping has been split, +// the mappings will be considered different. +func IsSameMapping(a, b []configs.IDMap) bool { + if len(a) != len(b) { + return false + } + for idx := range a { + if a[idx] != b[idx] { + return false + } + } + return true +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 553b35f1b..ce4cb70f7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -503,7 +503,7 @@ github.com/opencontainers/go-digest/digestset ## explicit; go 1.18 github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.1.10 +# github.com/opencontainers/runc v1.1.11 ## explicit; go 1.17 github.com/opencontainers/runc/libcontainer/apparmor github.com/opencontainers/runc/libcontainer/cgroups