diff --git a/virtcontainers/container.go b/virtcontainers/container.go index 37117d7e38..57a67fd485 100644 --- a/virtcontainers/container.go +++ b/virtcontainers/container.go @@ -1358,7 +1358,7 @@ func (c *Container) hotplugDrive() error { c.rootfsSuffix = "" } // If device mapper device, then fetch the full path of the device - devicePath, fsType, err = utils.GetDevicePathAndFsType(dev.mountPoint) + devicePath, fsType, _, err = utils.GetDevicePathAndFsTypeOptions(dev.mountPoint) if err != nil { return err } diff --git a/virtcontainers/device/config/pmem.go b/virtcontainers/device/config/pmem.go index 1b4b80f2b5..c4f723cf7f 100644 --- a/virtcontainers/device/config/pmem.go +++ b/virtcontainers/device/config/pmem.go @@ -75,7 +75,7 @@ func PmemDeviceInfo(source, destination string) (*DeviceInfo, error) { return nil, fmt.Errorf("backing file %v has not PFN signature", device.HostPath) } - _, fstype, err := utils.GetDevicePathAndFsType(source) + _, fstype, _, err := utils.GetDevicePathAndFsTypeOptions(source) if err != nil { pmemLog.WithError(err).WithField("mount-point", source).Warn("failed to get fstype: using ext4") fstype = "ext4" diff --git a/virtcontainers/kata_agent.go b/virtcontainers/kata_agent.go index 934b1bee82..4d60a61a58 100644 --- a/virtcontainers/kata_agent.go +++ b/virtcontainers/kata_agent.go @@ -18,6 +18,7 @@ import ( "syscall" "time" + units "github.com/docker/go-units" "github.com/gogo/protobuf/proto" aTypes "github.com/kata-containers/agent/pkg/types" kataclient "github.com/kata-containers/agent/protocols/client" @@ -33,6 +34,7 @@ import ( "github.com/kata-containers/runtime/virtcontainers/pkg/uuid" "github.com/kata-containers/runtime/virtcontainers/store" "github.com/kata-containers/runtime/virtcontainers/types" + "github.com/kata-containers/runtime/virtcontainers/utils" "github.com/opencontainers/runtime-spec/specs-go" opentracing "github.com/opentracing/opentracing-go" "github.com/sirupsen/logrus" @@ -1429,6 +1431,10 @@ func (k *kataAgent) createContainer(sandbox *Sandbox, c *Container) (p *Process, epheStorages := k.handleEphemeralStorage(ociSpec.Mounts) ctrStorages = append(ctrStorages, epheStorages...) + k.Logger().WithField("ociSpec Hugepage Resources", ociSpec.Linux.Resources.HugepageLimits).Debug("ociSpec HugepageLimit") + hugepages := k.handleHugepages(ociSpec.Mounts, ociSpec.Linux.Resources.HugepageLimits) + ctrStorages = append(ctrStorages, hugepages...) + localStorages := k.handleLocalStorage(ociSpec.Mounts, sandbox.id, c.rootfsSuffix) ctrStorages = append(ctrStorages, localStorages...) @@ -1550,6 +1556,64 @@ func (k *kataAgent) handleEphemeralStorage(mounts []specs.Mount) []*grpc.Storage return epheStorages } +// handleHugePages handles hugepages storage by +// creating a Storage from corresponding source of the mount point +func (k *kataAgent) handleHugepages(mounts []specs.Mount, hugepageLimits []specs.LinuxHugepageLimit) []*grpc.Storage { + //Map to hold the total memory of each type of hugepages + optionsMap := make(map[string]string) + var HugePageSizeUnitList = []string{"B", "K", "M", "G", "T", "P"} + + for _, hp := range hugepageLimits { + if hp.Limit != 0 { + k.Logger().WithFields(logrus.Fields{ + "Pagesize": hp.Pagesize, + "Limit": hp.Limit, + }).Info("hugepage request") + //example Pagesize 2MB, 1GB etc. The Limit are in Bytes + pageSize, _ := units.RAMInBytes(hp.Pagesize) + pageSizeStr := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList) + totalHpSize := fmt.Sprintf("%v", hp.Limit) + optionsMap[pageSizeStr] = totalHpSize + } + } + + var hugepages []*grpc.Storage + for idx, mnt := range mounts { + if mnt.Type != KataLocalDevType { + continue + } + //HugePages mount Type is Local + if _, fsType, fsOptions, _ := utils.GetDevicePathAndFsTypeOptions(mnt.Source); fsType == "hugetlbfs" { + k.Logger().WithField("fsOptions", fsOptions).Debug("hugepage mount options") + //Find the pagesize from the mountpoint to use the right set of options + //pagesize is the last element in the mount option. Can it change ? + pagesizeOpt := fsOptions[len(fsOptions)-1] + //Create mount option string + pageSize, _ := units.RAMInBytes(strings.TrimPrefix(pagesizeOpt, "pagesize=")) + pageSizeStr := units.CustomSize("%g%s", float64(pageSize), 1024.0, HugePageSizeUnitList) + options := "pagesize=" + pageSizeStr + "," + "size=" + optionsMap[pageSizeStr] + k.Logger().WithField("Hugepage options string", options).Debug("hugepage mount options") + // Set the mount source path to a path that resides inside the VM + mounts[idx].Source = filepath.Join(ephemeralPath(), filepath.Base(mnt.Source)) + // Set the mount type to "bind" + mounts[idx].Type = "bind" + + // Create a storage struct so that kata agent is able to create + // hugetlbfs backed volume inside the VM + hugepage := &grpc.Storage{ + Driver: KataEphemeralDevType, + Source: "nodev", + Fstype: "hugetlbfs", + MountPoint: mounts[idx].Source, + Options: []string{options}, + } + hugepages = append(hugepages, hugepage) + } + + } + return hugepages +} + // handleLocalStorage handles local storage within the VM // by creating a directory in the VM from the source of the mount point. func (k *kataAgent) handleLocalStorage(mounts []specs.Mount, sandboxID string, rootfsSuffix string) []*grpc.Storage { diff --git a/virtcontainers/kata_agent_test.go b/virtcontainers/kata_agent_test.go index 0041184fa0..6b64696b7d 100644 --- a/virtcontainers/kata_agent_test.go +++ b/virtcontainers/kata_agent_test.go @@ -408,6 +408,61 @@ func TestHandleLocalStorage(t *testing.T) { assert.Equal(t, localMountPoint, expected) } +func TestHandleHugepages(t *testing.T) { + if os.Getuid() != 0 { + t.Skip("Test disabled as requires root user") + } + + dir, err := ioutil.TempDir("", "hugepages-test") + assert.Nil(t, err) + defer os.RemoveAll(dir) + + k := kataAgent{} + var mounts []specs.Mount + var hugepageLimits []specs.LinuxHugepageLimit + + hugepageDirs := [2]string{"hugepages-1Gi", "hugepages-2Mi"} + options := [2]string{"pagesize=1024M", "pagesize=2M"} + + for i := 0; i < 2; i++ { + target := path.Join(dir, hugepageDirs[i]) + err := os.MkdirAll(target, 0777) + if err = syscall.Mount("nodev", target, "hugetlbfs", uintptr(0), options[i]); err != nil { + fmt.Printf("Unable to mount %s: %v\n", target, err) + } + assert.Nil(t, err) + defer syscall.Unmount(target, 0) + defer os.RemoveAll(target) + mount := specs.Mount{ + Type: KataLocalDevType, + Source: target, + } + mounts = append(mounts, mount) + + } + fmt.Printf("mounts: %v\n", mounts) + + hugepageLimits = []specs.LinuxHugepageLimit{ + { + Pagesize: "1GB", + Limit: 322122547, + }, + { + Pagesize: "2MB", + Limit: 134217728, + }, + } + fmt.Printf("hugepageLimits: %v\n", hugepageLimits) + + hugepages := k.handleHugepages(mounts, hugepageLimits) + + fmt.Printf("%v\n", hugepages) + + assert.NotNil(t, hugepages) + assert.Equal(t, len(hugepages), 2) + +} + func TestHandleDeviceBlockVolume(t *testing.T) { k := kataAgent{} diff --git a/virtcontainers/mount.go b/virtcontainers/mount.go index 2f9846e809..f19690992d 100644 --- a/virtcontainers/mount.go +++ b/virtcontainers/mount.go @@ -400,7 +400,7 @@ func IsEphemeralStorage(path string) bool { return false } - if _, fsType, _ := utils.GetDevicePathAndFsType(path); fsType == "tmpfs" { + if _, fsType, _, _ := utils.GetDevicePathAndFsTypeOptions(path); fsType == "tmpfs" { return true } @@ -415,7 +415,7 @@ func Isk8sHostEmptyDir(path string) bool { return false } - if _, fsType, _ := utils.GetDevicePathAndFsType(path); fsType != "tmpfs" { + if _, fsType, _, _ := utils.GetDevicePathAndFsTypeOptions(path); fsType != "tmpfs" { return true } return false diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 5613e35db7..9f0a2f45af 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1872,7 +1872,10 @@ func (s *Sandbox) updateResources() error { // Add default vcpus for sandbox sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs - sandboxMemoryByte := s.calculateSandboxMemory() + sandboxMemoryByte, err := s.calculateSandboxMemory() + if err != nil { + return err + } // Add default / rsvd memory for sandbox. sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift @@ -1912,7 +1915,7 @@ func (s *Sandbox) updateResources() error { return nil } -func (s *Sandbox) calculateSandboxMemory() int64 { +func (s *Sandbox) calculateSandboxMemory() (int64, error) { memorySandbox := int64(0) for _, c := range s.config.Containers { // Do not hot add again non-running containers resources @@ -1923,9 +1926,30 @@ func (s *Sandbox) calculateSandboxMemory() int64 { if m := c.Resources.Memory; m != nil && m.Limit != nil { memorySandbox += *m.Limit + s.Logger().WithField("memory limit", memorySandbox).Info("Memory Sandbox + Memory Limit ") + } + + //Add hugepages memory + //HugepageLimit is uint64 - https://github.com/opencontainers/runtime-spec/blob/master/specs-go/config.go#L242 + for _, l := range c.Resources.HugepageLimits { + if l.Limit <= math.MaxInt64 { + memorySandbox += int64(l.Limit) + } else { + //Overflow + s.Logger().WithField("hugepages memory limit overflow", l.Limit).Warn("Memory Sandbox + Hugepages Memory Limit ") + return 0, errors.New("Integer overflow") + } + + s.Logger().WithField("hugepages memory limit", memorySandbox).Info("Memory Sandbox + Hugepages Memory Limit ") } } - return memorySandbox + + //Check overflow + if memorySandbox < 0 { + return 0, errors.New("Integer overflow") + } else { + return memorySandbox, nil + } } func (s *Sandbox) calculateSandboxCPUs() (uint32, error) { diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 69c2131b06..12f19b0c43 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -17,6 +17,7 @@ import ( "sync" "syscall" "testing" + "math" ktu "github.com/kata-containers/runtime/pkg/katatestutils" "github.com/kata-containers/runtime/virtcontainers/device/config" @@ -167,12 +168,90 @@ func TestCalculateSandboxMem(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { sandbox.config.Containers = tt.containers - got := sandbox.calculateSandboxMemory() + got, err := sandbox.calculateSandboxMemory() assert.Equal(t, got, tt.want) + assert.NoError(t, err) }) } } +func TestSandboxHugepageLimit(t *testing.T) { + contConfig1 := newTestContainerConfigNoop("cont-00001") + contConfig2 := newTestContainerConfigNoop("cont-00002") + limit := int64(4000) + contConfig1.Resources.Memory = &specs.LinuxMemory{Limit: &limit} + contConfig2.Resources.Memory = &specs.LinuxMemory{Limit: &limit} + hConfig := newHypervisorConfig(nil, nil) + + defer cleanUp() + // create a sandbox + s, err := testCreateSandbox(t, + testSandboxID, + MockHypervisor, + hConfig, + NoopAgentType, + NetworkConfig{}, + []ContainerConfig{contConfig1, contConfig2}, + nil) + + assert.NoError(t, err) + + hugepageLimits := []specs.LinuxHugepageLimit{ + { + Pagesize: "1GB", + Limit: 322122547, + }, + { + Pagesize: "2MB", + Limit: 134217728, + }, + } + + for i := range s.config.Containers { + s.config.Containers[i].Resources.HugepageLimits = hugepageLimits + } + err = s.updateResources() + assert.NoError(t, err) +} + + +func TestSandboxHugepageLimitOverflow(t *testing.T) { + contConfig1 := newTestContainerConfigNoop("cont-00001") + contConfig2 := newTestContainerConfigNoop("cont-00002") + hConfig := newHypervisorConfig(nil, nil) + + defer cleanUp() + // create a sandbox + s, err := testCreateSandbox(t, + testSandboxID, + MockHypervisor, + hConfig, + NoopAgentType, + NetworkConfig{}, + []ContainerConfig{contConfig1, contConfig2}, + nil) + + assert.NoError(t, err) + + hugepageLimits := []specs.LinuxHugepageLimit{ + { + Pagesize: "1GB", + Limit: math.MaxUint64, + }, + { + Pagesize: "2MB", + Limit: math.MaxUint64, + }, + } + + for i := range s.config.Containers { + s.config.Containers[i].Resources.HugepageLimits = hugepageLimits + } + err = s.updateResources() + assert.Error(t, err) +} + + func TestCreateSandboxEmptyID(t *testing.T) { hConfig := newHypervisorConfig(nil, nil) _, err := testCreateSandbox(t, "", MockHypervisor, hConfig, NoopAgentType, NetworkConfig{}, nil, nil) diff --git a/virtcontainers/utils/utils_linux.go b/virtcontainers/utils/utils_linux.go index ad870d63ee..b24be33ddc 100644 --- a/virtcontainers/utils/utils_linux.go +++ b/virtcontainers/utils/utils_linux.go @@ -99,11 +99,12 @@ const ( procDeviceIndex = iota procPathIndex procTypeIndex + procOptionIndex ) -// GetDevicePathAndFsType gets the device for the mount point and the file system type -// of the mount. -func GetDevicePathAndFsType(mountPoint string) (devicePath, fsType string, err error) { +// GetDevicePathAndFsTypeOptions gets the device for the mount point, the file system type +// and mount options +func GetDevicePathAndFsTypeOptions(mountPoint string) (devicePath, fsType string, fsOptions []string, err error) { if mountPoint == "" { err = fmt.Errorf("Mount point cannot be empty") return @@ -137,6 +138,7 @@ func GetDevicePathAndFsType(mountPoint string) (devicePath, fsType string, err e if mountPoint == fields[procPathIndex] { devicePath = fields[procDeviceIndex] fsType = fields[procTypeIndex] + fsOptions = strings.Split(fields[procOptionIndex], ",") return } } diff --git a/virtcontainers/utils/utils_linux_test.go b/virtcontainers/utils/utils_linux_test.go index 4554fa935d..30ab6fa893 100644 --- a/virtcontainers/utils/utils_linux_test.go +++ b/virtcontainers/utils/utils_linux_test.go @@ -36,16 +36,18 @@ func TestFindContextID(t *testing.T) { func TestGetDevicePathAndFsTypeEmptyMount(t *testing.T) { assert := assert.New(t) - _, _, err := GetDevicePathAndFsType("") + _, _, _, err := GetDevicePathAndFsTypeOptions("") assert.Error(err) } func TestGetDevicePathAndFsTypeSuccessful(t *testing.T) { assert := assert.New(t) - path, fstype, err := GetDevicePathAndFsType("/proc") + path, fstype, fsOptions, err := GetDevicePathAndFsTypeOptions("/proc") assert.NoError(err) assert.Equal(path, "proc") assert.Equal(fstype, "proc") + assert.ElementsMatch(fsOptions, "rw nosuid nodev noexec relatime") } +