diff --git a/cli/config/configuration-qemu.toml.in b/cli/config/configuration-qemu.toml.in index d87e5269ff..dbbf80ddce 100644 --- a/cli/config/configuration-qemu.toml.in +++ b/cli/config/configuration-qemu.toml.in @@ -89,6 +89,12 @@ default_memory = @DEFMEMSZ@ # Default 0 #memory_offset = 0 +# Specifies virtio-mem will be enabled or not. +# Please note that this option should be used with the command +# "echo 1 > /proc/sys/vm/overcommit_memory". +# Default false +#enable_virtio_mem = true + # Disable block device from being used for a container's rootfs. # In case of a storage driver like devicemapper where a container's # root file system is backed by a block device, the block device is passed diff --git a/pkg/katautils/config-settings.go b/pkg/katautils/config-settings.go index 17e6141c29..9bae10f7ee 100644 --- a/pkg/katautils/config-settings.go +++ b/pkg/katautils/config-settings.go @@ -27,6 +27,7 @@ const defaultMaxVCPUCount uint32 = 0 const defaultMemSize uint32 = 2048 // MiB const defaultMemSlots uint32 = 10 const defaultMemOffset uint32 = 0 // MiB +const defaultVirtioMem bool = false const defaultBridgesCount uint32 = 1 const defaultInterNetworkingModel = "tcfilter" const defaultDisableBlockDeviceUse bool = false diff --git a/pkg/katautils/config.go b/pkg/katautils/config.go index 9badc68203..30b89d75e8 100644 --- a/pkg/katautils/config.go +++ b/pkg/katautils/config.go @@ -109,6 +109,7 @@ type hypervisor struct { MemorySize uint32 `toml:"default_memory"` MemSlots uint32 `toml:"memory_slots"` MemOffset uint32 `toml:"memory_offset"` + VirtioMem bool `toml:"enable_virtio_mem"` DefaultBridges uint32 `toml:"default_bridges"` Msize9p uint32 `toml:"msize_9p"` DisableBlockDeviceUse bool `toml:"disable_block_device_use"` @@ -623,6 +624,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), MemOffset: h.defaultMemOffset(), + VirtioMem: h.VirtioMem, EntropySource: h.GetEntropySource(), DefaultBridges: h.defaultBridges(), DisableBlockDeviceUse: h.DisableBlockDeviceUse, @@ -773,6 +775,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) { MemorySize: h.defaultMemSz(), MemSlots: h.defaultMemSlots(), MemOffset: h.defaultMemOffset(), + VirtioMem: h.VirtioMem, EntropySource: h.GetEntropySource(), DefaultBridges: h.defaultBridges(), DisableBlockDeviceUse: h.DisableBlockDeviceUse, @@ -1054,6 +1057,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig { DefaultMaxVCPUs: defaultMaxVCPUCount, MemorySize: defaultMemSize, MemOffset: defaultMemOffset, + VirtioMem: defaultVirtioMem, DisableBlockDeviceUse: defaultDisableBlockDeviceUse, DefaultBridges: defaultBridgesCount, MemPrealloc: defaultEnableMemPrealloc, diff --git a/virtcontainers/hypervisor.go b/virtcontainers/hypervisor.go index e376d0ccd3..ebdd5170d3 100644 --- a/virtcontainers/hypervisor.go +++ b/virtcontainers/hypervisor.go @@ -239,6 +239,9 @@ type HypervisorConfig struct { // MemOffset specifies memory space for nvdimm device MemOffset uint32 + // VirtioMem is used to enable/disable virtio-mem + VirtioMem bool + // VirtioFSCacheSize is the DAX cache size in MiB VirtioFSCacheSize uint32 diff --git a/virtcontainers/persist.go b/virtcontainers/persist.go index f964fac02e..47a5c10383 100644 --- a/virtcontainers/persist.go +++ b/virtcontainers/persist.go @@ -214,6 +214,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { Msize9p: sconfig.HypervisorConfig.Msize9p, MemSlots: sconfig.HypervisorConfig.MemSlots, MemOffset: sconfig.HypervisorConfig.MemOffset, + VirtioMem: sconfig.HypervisorConfig.VirtioMem, VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize, KernelPath: sconfig.HypervisorConfig.KernelPath, ImagePath: sconfig.HypervisorConfig.ImagePath, @@ -499,6 +500,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { Msize9p: hconf.Msize9p, MemSlots: hconf.MemSlots, MemOffset: hconf.MemOffset, + VirtioMem: hconf.VirtioMem, VirtioFSCacheSize: hconf.VirtioFSCacheSize, KernelPath: hconf.KernelPath, ImagePath: hconf.ImagePath, diff --git a/virtcontainers/persist/api/config.go b/virtcontainers/persist/api/config.go index 50e51d9b67..ca2728b31c 100644 --- a/virtcontainers/persist/api/config.go +++ b/virtcontainers/persist/api/config.go @@ -35,6 +35,9 @@ type HypervisorConfig struct { // MemOffset specifies memory space for nvdimm device MemOffset uint32 + // VirtioMem is used to enable/disable virtio-mem + VirtioMem bool + // VirtioFSCacheSize is the DAX cache size in MiB VirtioFSCacheSize uint32 diff --git a/virtcontainers/pkg/annotations/annotations.go b/virtcontainers/pkg/annotations/annotations.go index e06b071eaa..3be3aa1866 100644 --- a/virtcontainers/pkg/annotations/annotations.go +++ b/virtcontainers/pkg/annotations/annotations.go @@ -124,6 +124,9 @@ const ( // MemOffset is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. MemOffset = kataAnnotHypervisorPrefix + "memory_offset" + // VirtioMem is a sandbox annotation that is used to enable/disable virtio-mem. + VirtioMem = kataAnnotHypervisorPrefix + "enable_virtio_mem" + // MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor. MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc" diff --git a/virtcontainers/pkg/oci/utils.go b/virtcontainers/pkg/oci/utils.go index 4cf6d5f100..19d2f5e1fd 100644 --- a/virtcontainers/pkg/oci/utils.go +++ b/virtcontainers/pkg/oci/utils.go @@ -492,6 +492,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig } } + if value, ok := ocispec.Annotations[vcAnnotations.VirtioMem]; ok { + virtioMem, err := strconv.ParseBool(value) + if err != nil { + return fmt.Errorf("Error parsing annotation for enable_virtio_mem: Please specify boolean value 'true|false'") + } + + sbConfig.HypervisorConfig.VirtioMem = virtioMem + } + if value, ok := ocispec.Annotations[vcAnnotations.MemPrealloc]; ok { memPrealloc, err := strconv.ParseBool(value) if err != nil { diff --git a/virtcontainers/pkg/oci/utils_test.go b/virtcontainers/pkg/oci/utils_test.go index 49976fcf52..792ed37cc4 100644 --- a/virtcontainers/pkg/oci/utils_test.go +++ b/virtcontainers/pkg/oci/utils_test.go @@ -741,6 +741,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.DefaultMemory] = "1024" ocispec.Annotations[vcAnnotations.MemSlots] = "20" ocispec.Annotations[vcAnnotations.MemOffset] = "512" + ocispec.Annotations[vcAnnotations.VirtioMem] = "true" ocispec.Annotations[vcAnnotations.MemPrealloc] = "true" ocispec.Annotations[vcAnnotations.EnableSwap] = "true" ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm" @@ -770,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) { assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024)) assert.Equal(config.HypervisorConfig.MemSlots, uint32(20)) assert.Equal(config.HypervisorConfig.MemOffset, uint32(512)) + assert.Equal(config.HypervisorConfig.VirtioMem, true) assert.Equal(config.HypervisorConfig.MemPrealloc, true) assert.Equal(config.HypervisorConfig.Mlock, false) assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm") diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index 22a1d274ea..1c06d2eb42 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -668,6 +668,56 @@ func (q *qemu) setupVirtiofsd() (err error) { return err } +func (q *qemu) getMemArgs() (bool, string, string) { + share := false + target := "" + memoryBack := "memory-backend-ram" + + if q.qemuConfig.Knobs.HugePages { + // we are setting all the bits that govmm sets when hugepages are enabled. + // https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677 + target = "/dev/hugepages" + memoryBack = "memory-backend-file" + share = true + } else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" { + target = q.qemuConfig.Memory.Path + memoryBack = "memory-backend-file" + } + if q.qemuConfig.Knobs.MemShared { + share = true + } + + return share, target, memoryBack +} + +func (q *qemu) setupVirtioMem() error { + maxMem, err := q.hostMemMB() + if err != nil { + return err + } + // 1024 is size for nvdimm + sizeMB := int(maxMem) - int(q.config.MemorySize) + + share, target, memoryBack := q.getMemArgs() + err = q.qmpSetup() + if err != nil { + return err + } + err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0") + if err == nil { + q.config.VirtioMem = true + q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB) + } else { + help := "" + if strings.Index(err.Error(), "Cannot allocate memory") != -1 { + help = ". Please use command \"echo 1 > /proc/sys/vm/overcommit_memory\" handle it." + } + err = fmt.Errorf("Add %dMB virtio-mem-pci fail %s%s", sizeMB, err.Error(), help) + } + + return err +} + // startSandbox will start the Sandbox's VM. func (q *qemu) startSandbox(timeout int) error { span, _ := q.trace("startSandbox") @@ -744,6 +794,10 @@ func (q *qemu) startSandbox(timeout int) error { } } + if q.config.VirtioMem { + err = q.setupVirtioMem() + } + return err } @@ -1449,9 +1503,6 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) { func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) { memoryDevices, err := q.qmpMonitorCh.qmp.ExecQueryMemoryDevices(q.qmpMonitorCh.ctx) - share := false - target := "" - memoryBack := "memory-backend-ram" if err != nil { return 0, fmt.Errorf("failed to query memory devices: %v", err) } @@ -1465,19 +1516,8 @@ func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) { } memDev.slot = maxSlot + 1 } - if q.qemuConfig.Knobs.HugePages { - // we are setting all the bits that govmm sets when hugepages are enabled. - // https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677 - target = "/dev/hugepages" - memoryBack = "memory-backend-file" - share = true - } else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" { - target = q.qemuConfig.Memory.Path - memoryBack = "memory-backend-file" - } - if q.qemuConfig.Knobs.MemShared { - share = true - } + + share, target, memoryBack := q.getMemArgs() err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, memoryBack, "mem"+strconv.Itoa(memDev.slot), target, memDev.sizeMB, share) if err != nil { q.Logger().WithError(err).Error("hotplug memory") @@ -1661,6 +1701,17 @@ func (q *qemu) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe boo return 0, memoryDevice{}, err } var addMemDevice memoryDevice + if q.config.VirtioMem && currentMemory != reqMemMB { + q.Logger().WithField("hotplug", "memory").Debugf("resize memory from %dMB to %dMB", currentMemory, reqMemMB) + sizeByte := (reqMemMB - q.config.MemorySize) * 1024 * 1024 + err = q.qmpMonitorCh.qmp.ExecQomSet(q.qmpMonitorCh.ctx, "virtiomem0", "requested-size", uint64(sizeByte)) + if err != nil { + return 0, memoryDevice{}, err + } + q.state.HotpluggedMemory = int(sizeByte / 1024 / 1024) + return reqMemMB, memoryDevice{}, nil + } + switch { case currentMemory < reqMemMB: //hotplug diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index ce5ec68223..8b9b7abb8c 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -1764,6 +1764,7 @@ func (s *Sandbox) calculateSandboxMemory() int64 { if m := c.Resources.Memory; m != nil && m.Limit != nil { memorySandbox += *m.Limit + s.Logger().WithField("container-id", c.ID).Debugf("jojo size %d", memorySandbox / 1024/1024) } } return memorySandbox