Skip to content

Commit

Permalink
qemu: Add virtio-mem support
Browse files Browse the repository at this point in the history
This commit add qemu virtio-mem support.
Then qemu can use virtio-mem support memory resize.

To enable this function, need the Linux and the qemu that support
virtio-mem.
Use command "echo 1 > /proc/sys/vm/overcommit_memory" to enable memory
overcommitment of Linux kernel.  Because qemu virtio-mem device need
allocate a lot of memory.
Set "enable_virtio_mem" of kata configuration to true.

Fixes: kata-containers#2406

Signed-off-by: Hui Zhu <teawater@antfin.com>
  • Loading branch information
teawater committed Jan 22, 2020
1 parent b337428 commit 1b45389
Show file tree
Hide file tree
Showing 11 changed files with 101 additions and 16 deletions.
6 changes: 6 additions & 0 deletions cli/config/configuration-qemu.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ default_memory = @DEFMEMSZ@
# Default 0
#memory_offset = 0

# Specifies virtio-mem will be enabled or not.
# Please note that this option should be used with the command
# "echo 1 > /proc/sys/vm/overcommit_memory".
# Default false
#enable_virtio_mem = true

# Disable block device from being used for a container's rootfs.
# In case of a storage driver like devicemapper where a container's
# root file system is backed by a block device, the block device is passed
Expand Down
1 change: 1 addition & 0 deletions pkg/katautils/config-settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const defaultMaxVCPUCount uint32 = 0
const defaultMemSize uint32 = 2048 // MiB
const defaultMemSlots uint32 = 10
const defaultMemOffset uint32 = 0 // MiB
const defaultVirtioMem bool = false
const defaultBridgesCount uint32 = 1
const defaultInterNetworkingModel = "tcfilter"
const defaultDisableBlockDeviceUse bool = false
Expand Down
4 changes: 4 additions & 0 deletions pkg/katautils/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ type hypervisor struct {
MemorySize uint32 `toml:"default_memory"`
MemSlots uint32 `toml:"memory_slots"`
MemOffset uint32 `toml:"memory_offset"`
VirtioMem bool `toml:"enable_virtio_mem"`
DefaultBridges uint32 `toml:"default_bridges"`
Msize9p uint32 `toml:"msize_9p"`
DisableBlockDeviceUse bool `toml:"disable_block_device_use"`
Expand Down Expand Up @@ -623,6 +624,7 @@ func newQemuHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
Expand Down Expand Up @@ -773,6 +775,7 @@ func newClhHypervisorConfig(h hypervisor) (vc.HypervisorConfig, error) {
MemorySize: h.defaultMemSz(),
MemSlots: h.defaultMemSlots(),
MemOffset: h.defaultMemOffset(),
VirtioMem: h.VirtioMem,
EntropySource: h.GetEntropySource(),
DefaultBridges: h.defaultBridges(),
DisableBlockDeviceUse: h.DisableBlockDeviceUse,
Expand Down Expand Up @@ -1054,6 +1057,7 @@ func GetDefaultHypervisorConfig() vc.HypervisorConfig {
DefaultMaxVCPUs: defaultMaxVCPUCount,
MemorySize: defaultMemSize,
MemOffset: defaultMemOffset,
VirtioMem: defaultVirtioMem,
DisableBlockDeviceUse: defaultDisableBlockDeviceUse,
DefaultBridges: defaultBridgesCount,
MemPrealloc: defaultEnableMemPrealloc,
Expand Down
3 changes: 3 additions & 0 deletions virtcontainers/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ type HypervisorConfig struct {
// MemOffset specifies memory space for nvdimm device
MemOffset uint32

// VirtioMem is used to enable/disable virtio-mem
VirtioMem bool

// VirtioFSCacheSize is the DAX cache size in MiB
VirtioFSCacheSize uint32

Expand Down
2 changes: 2 additions & 0 deletions virtcontainers/persist.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) {
Msize9p: sconfig.HypervisorConfig.Msize9p,
MemSlots: sconfig.HypervisorConfig.MemSlots,
MemOffset: sconfig.HypervisorConfig.MemOffset,
VirtioMem: sconfig.HypervisorConfig.VirtioMem,
VirtioFSCacheSize: sconfig.HypervisorConfig.VirtioFSCacheSize,
KernelPath: sconfig.HypervisorConfig.KernelPath,
ImagePath: sconfig.HypervisorConfig.ImagePath,
Expand Down Expand Up @@ -499,6 +500,7 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) {
Msize9p: hconf.Msize9p,
MemSlots: hconf.MemSlots,
MemOffset: hconf.MemOffset,
VirtioMem: hconf.VirtioMem,
VirtioFSCacheSize: hconf.VirtioFSCacheSize,
KernelPath: hconf.KernelPath,
ImagePath: hconf.ImagePath,
Expand Down
3 changes: 3 additions & 0 deletions virtcontainers/persist/api/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ type HypervisorConfig struct {
// MemOffset specifies memory space for nvdimm device
MemOffset uint32

// VirtioMem is used to enable/disable virtio-mem
VirtioMem bool

// VirtioFSCacheSize is the DAX cache size in MiB
VirtioFSCacheSize uint32

Expand Down
3 changes: 3 additions & 0 deletions virtcontainers/pkg/annotations/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ const (
// MemOffset is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
MemOffset = kataAnnotHypervisorPrefix + "memory_offset"

// VirtioMem is a sandbox annotation that is used to enable/disable virtio-mem.
VirtioMem = kataAnnotHypervisorPrefix + "enable_virtio_mem"

// MemPrealloc is a sandbox annotation that specifies the memory space used for nvdimm device by the hypervisor.
MemPrealloc = kataAnnotHypervisorPrefix + "enable_mem_prealloc"

Expand Down
9 changes: 9 additions & 0 deletions virtcontainers/pkg/oci/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,15 @@ func addHypervisorMemoryOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig
}
}

if value, ok := ocispec.Annotations[vcAnnotations.VirtioMem]; ok {
virtioMem, err := strconv.ParseBool(value)
if err != nil {
return fmt.Errorf("Error parsing annotation for enable_virtio_mem: Please specify boolean value 'true|false'")
}

sbConfig.HypervisorConfig.VirtioMem = virtioMem
}

if value, ok := ocispec.Annotations[vcAnnotations.MemPrealloc]; ok {
memPrealloc, err := strconv.ParseBool(value)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions virtcontainers/pkg/oci/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
ocispec.Annotations[vcAnnotations.DefaultMemory] = "1024"
ocispec.Annotations[vcAnnotations.MemSlots] = "20"
ocispec.Annotations[vcAnnotations.MemOffset] = "512"
ocispec.Annotations[vcAnnotations.VirtioMem] = "true"
ocispec.Annotations[vcAnnotations.MemPrealloc] = "true"
ocispec.Annotations[vcAnnotations.EnableSwap] = "true"
ocispec.Annotations[vcAnnotations.FileBackedMemRootDir] = "/dev/shm"
Expand Down Expand Up @@ -770,6 +771,7 @@ func TestAddHypervisorAnnotations(t *testing.T) {
assert.Equal(config.HypervisorConfig.MemorySize, uint32(1024))
assert.Equal(config.HypervisorConfig.MemSlots, uint32(20))
assert.Equal(config.HypervisorConfig.MemOffset, uint32(512))
assert.Equal(config.HypervisorConfig.VirtioMem, true)
assert.Equal(config.HypervisorConfig.MemPrealloc, true)
assert.Equal(config.HypervisorConfig.Mlock, false)
assert.Equal(config.HypervisorConfig.FileBackedMemRootDir, "/dev/shm")
Expand Down
83 changes: 67 additions & 16 deletions virtcontainers/qemu.go
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,56 @@ func (q *qemu) setupVirtiofsd() (err error) {
return err
}

func (q *qemu) getMemArgs() (bool, string, string) {
share := false
target := ""
memoryBack := "memory-backend-ram"

if q.qemuConfig.Knobs.HugePages {
// we are setting all the bits that govmm sets when hugepages are enabled.
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
target = "/dev/hugepages"
memoryBack = "memory-backend-file"
share = true
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
target = q.qemuConfig.Memory.Path
memoryBack = "memory-backend-file"
}
if q.qemuConfig.Knobs.MemShared {
share = true
}

return share, target, memoryBack
}

func (q *qemu) setupVirtioMem() error {
maxMem, err := q.hostMemMB()
if err != nil {
return err
}
// 1024 is size for nvdimm
sizeMB := int(maxMem) - int(q.config.MemorySize)

share, target, memoryBack := q.getMemArgs()
err = q.qmpSetup()
if err != nil {
return err
}
err = q.qmpMonitorCh.qmp.ExecMemdevAdd(q.qmpMonitorCh.ctx, memoryBack, "virtiomem", target, sizeMB, share, "virtio-mem-pci", "virtiomem0")
if err == nil {
q.config.VirtioMem = true
q.Logger().Infof("Setup %dMB virtio-mem-pci success", sizeMB)
} else {
help := ""
if strings.Index(err.Error(), "Cannot allocate memory") != -1 {
help = ". Please use command \"echo 1 > /proc/sys/vm/overcommit_memory\" handle it."
}
err = fmt.Errorf("Add %dMB virtio-mem-pci fail %s%s", sizeMB, err.Error(), help)
}

return err
}

// startSandbox will start the Sandbox's VM.
func (q *qemu) startSandbox(timeout int) error {
span, _ := q.trace("startSandbox")
Expand Down Expand Up @@ -744,6 +794,10 @@ func (q *qemu) startSandbox(timeout int) error {
}
}

if q.config.VirtioMem {
err = q.setupVirtioMem()
}

return err
}

Expand Down Expand Up @@ -1449,9 +1503,6 @@ func (q *qemu) hotplugMemory(memDev *memoryDevice, op operation) (int, error) {

func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
memoryDevices, err := q.qmpMonitorCh.qmp.ExecQueryMemoryDevices(q.qmpMonitorCh.ctx)
share := false
target := ""
memoryBack := "memory-backend-ram"
if err != nil {
return 0, fmt.Errorf("failed to query memory devices: %v", err)
}
Expand All @@ -1465,19 +1516,8 @@ func (q *qemu) hotplugAddMemory(memDev *memoryDevice) (int, error) {
}
memDev.slot = maxSlot + 1
}
if q.qemuConfig.Knobs.HugePages {
// we are setting all the bits that govmm sets when hugepages are enabled.
// https://github.com/intel/govmm/blob/master/qemu/qemu.go#L1677
target = "/dev/hugepages"
memoryBack = "memory-backend-file"
share = true
} else if q.config.SharedFS == config.VirtioFS || q.config.FileBackedMemRootDir != "" {
target = q.qemuConfig.Memory.Path
memoryBack = "memory-backend-file"
}
if q.qemuConfig.Knobs.MemShared {
share = true
}

share, target, memoryBack := q.getMemArgs()
err = q.qmpMonitorCh.qmp.ExecHotplugMemory(q.qmpMonitorCh.ctx, memoryBack, "mem"+strconv.Itoa(memDev.slot), target, memDev.sizeMB, share)
if err != nil {
q.Logger().WithError(err).Error("hotplug memory")
Expand Down Expand Up @@ -1661,6 +1701,17 @@ func (q *qemu) resizeMemory(reqMemMB uint32, memoryBlockSizeMB uint32, probe boo
return 0, memoryDevice{}, err
}
var addMemDevice memoryDevice
if q.config.VirtioMem && currentMemory != reqMemMB {
q.Logger().WithField("hotplug", "memory").Debugf("resize memory from %dMB to %dMB", currentMemory, reqMemMB)
sizeByte := (reqMemMB - q.config.MemorySize) * 1024 * 1024
err = q.qmpMonitorCh.qmp.ExecQomSet(q.qmpMonitorCh.ctx, "virtiomem0", "requested-size", uint64(sizeByte))
if err != nil {
return 0, memoryDevice{}, err
}
q.state.HotpluggedMemory = int(sizeByte / 1024 / 1024)
return reqMemMB, memoryDevice{}, nil
}

switch {
case currentMemory < reqMemMB:
//hotplug
Expand Down
1 change: 1 addition & 0 deletions virtcontainers/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -1764,6 +1764,7 @@ func (s *Sandbox) calculateSandboxMemory() int64 {

if m := c.Resources.Memory; m != nil && m.Limit != nil {
memorySandbox += *m.Limit
s.Logger().WithField("container-id", c.ID).Debugf("jojo size %d", memorySandbox / 1024/1024)
}
}
return memorySandbox
Expand Down

0 comments on commit 1b45389

Please sign in to comment.