Skip to content

Commit

Permalink
sandbox/cgroups: don't constrain if using SandboxCgroupsOnly
Browse files Browse the repository at this point in the history
When SandboxCgroupsOnly is set, we are expected to just inherit our parent's
cgroup settings and to move all Kata threads within that sandbox cgroup. The
initial implementation still adjusted the size of this cgroup. This commit
fixes this.

This commit makes a couple of functional changes, small refactors, and
adds clarifying comments for some functions.

Fixes: kata-containers#2090

Signed-off-by: Eric Ernst <eric.ernst@intel.com>
  • Loading branch information
Eric Ernst committed Oct 30, 2019
1 parent da98191 commit 0871038
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 34 deletions.
4 changes: 2 additions & 2 deletions virtcontainers/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const cgroupKataPrefix = "kata"
var cgroupsLoadFunc = cgroups.Load
var cgroupsNewFunc = cgroups.New

// V1Constraints returns the cgroups that are compatible with th VC architecture
// V1Constraints returns the cgroups that are compatible with the VC architecture
// and hypervisor, constraints can be applied to these cgroups.
func V1Constraints() ([]cgroups.Subsystem, error) {
root, err := cgroupV1MountPoint()
Expand All @@ -51,7 +51,7 @@ func V1Constraints() ([]cgroups.Subsystem, error) {
return cgroupsSubsystems(subsystems)
}

// V1NoConstraints returns the cgroups that are *not* compatible with th VC
// V1NoConstraints returns the cgroups that are *not* compatible with the VC
// architecture and hypervisor, constraints MUST NOT be applied to these cgroups.
func V1NoConstraints() ([]cgroups.Subsystem, error) {
root, err := cgroupV1MountPoint()
Expand Down
85 changes: 53 additions & 32 deletions virtcontainers/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -1806,10 +1806,12 @@ func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) {
return b, nil
}

// updateResources will calculate the resources required for the virtual machine, and
// adjust the virtual machine sizing accordingly. For a given sandbox, it will calculate the
// number of vCPUs required based on the sum of container requests, plus default CPUs for the VM.
// Similar is done for memory. If changes in memory or CPU are made, the VM will be updated and
// the agent will online the applicable CPU and memory.
func (s *Sandbox) updateResources() error {
// the hypervisor.MemorySize is the amount of memory reserved for
// the VM and contaniners without memory limit

if s == nil {
return errors.New("sandbox is nil")
}
Expand All @@ -1822,16 +1824,18 @@ func (s *Sandbox) updateResources() error {
// Add default vcpus for sandbox
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs

sandboxMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
sandboxMemoryByte += s.calculateSandboxMemory()
sandboxMemoryByte := s.calculateSandboxMemory()
// Add default / rsvd memory for sandbox.
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift

// Update VCPUs
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs)
if err != nil {
return err
}
// The CPUs were increased, ask agent to online them

// If the CPUs were increased, ask agent to online them
if oldCPUs < newCPUs {
vcpusAdded := newCPUs - oldCPUs
if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil {
Expand All @@ -1848,7 +1852,7 @@ func (s *Sandbox) updateResources() error {
}
s.Logger().Debugf("Sandbox memory size: %d MB", newMemory)
if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 {
//notify the guest kernel about memory hot-add event, before onlining them
// notify the guest kernel about memory hot-add event, before onlining them
s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr)
if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil {
return err
Expand Down Expand Up @@ -1890,6 +1894,10 @@ func (s *Sandbox) GetHypervisorType() string {
return string(s.config.HypervisorType)
}

// cgroupsUpdate will:
// 1) get the v1constraints cgroup associated with the stored cgroup path
// 2) (re-)add hypervisor vCPU threads to the appropriate cgroup
// 3) If we are managing sandbox cgroup, update the v1constraints cgroup size
func (s *Sandbox) cgroupsUpdate() error {
if s.state.CgroupPath == "" {
s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
Expand All @@ -1901,7 +1909,7 @@ func (s *Sandbox) cgroupsUpdate() error {
return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
}

if err := s.constrainHypervisorVCPUs(cgroup); err != nil {
if err := s.constrainHypervisor(cgroup); err != nil {
return err
}

Expand All @@ -1910,18 +1918,24 @@ func (s *Sandbox) cgroupsUpdate() error {
return nil
}

resources, err := s.resources()
if err != nil {
return err
}
// If Kata is configured for SandboxCgroupOnly, do not change the constraints on the cgroup.
// We will rely on the values set within the parent cgroup
if !s.config.SandboxCgroupOnly {
resources, err := s.resources()
if err != nil {
return err
}

if err := cgroup.Update(&resources); err != nil {
return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err)
if err := cgroup.Update(&resources); err != nil {
return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err)
}
}

return nil
}

// cgroupsDelete will move the running processes in the sandbox cgroup
// to the parent and then delete the sandbox cgroup
func (s *Sandbox) cgroupsDelete() error {
s.Logger().Debug("Deleting sandbox cgroup")
if s.state.CgroupPath == "" {
Expand All @@ -1930,19 +1944,19 @@ func (s *Sandbox) cgroupsDelete() error {
}

var path string
cgroupSubystems := V1NoConstraints
var cgroupSubsystems cgroups.Hierarchy

if s.config.SandboxCgroupOnly {
// Override V1NoConstraints, if SandboxCgroupOnly is enabled
cgroupSubystems = cgroups.V1
cgroupSubsystems = cgroups.V1
path = s.state.CgroupPath
s.Logger().WithField("path", path).Debug("Deleting sandbox cgroups (all subsystems)")
} else {
cgroupSubsystems = V1NoConstraints
path = cgroupNoConstraintsPath(s.state.CgroupPath)
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
}

sandboxCgroups, err := cgroupsLoadFunc(cgroupSubystems, cgroups.StaticPath(path))
sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
if err == cgroups.ErrCgroupDeleted {
// cgroup already deleted
s.Logger().Warnf("cgroup already deleted: '%s'", err)
Expand All @@ -1954,7 +1968,7 @@ func (s *Sandbox) cgroupsDelete() error {
}

// move running process here, that way cgroup can be removed
parent, err := parentCgroup(cgroupSubystems, path)
parent, err := parentCgroup(cgroupSubsystems, path)
if err != nil {
// parent cgroup doesn't exist, that means there are no process running
// and the no constraints cgroup was removed.
Expand All @@ -1970,35 +1984,42 @@ func (s *Sandbox) cgroupsDelete() error {
return sandboxCgroups.Delete()
}

func (s *Sandbox) constrainHypervisorVCPUs(cgroup cgroups.Cgroup) error {
// constrainHypervisor will place the VMM and vCPU threads into cgroups.
func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
pids := s.hypervisor.getPids()
if len(pids) == 0 || pids[0] == 0 {
return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
}

// Move hypervisor into cgroups without constraints,
// those cgroups are not yet supported.
resources := &specs.LinuxResources{}
path := cgroupNoConstraintsPath(s.state.CgroupPath)
noConstraintsCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
if err != nil {
return fmt.Errorf("Could not create cgroup %v: %v", path, err)

// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup
vmmCgroup := cgroup
if !s.config.SandboxCgroupOnly {
// Move the VMM into cgroups without constraints, those cgroups are not yet supported.
resources := &specs.LinuxResources{}
path := cgroupNoConstraintsPath(s.state.CgroupPath)
var err error
vmmCgroup, err = cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
if err != nil {
return fmt.Errorf("Could not create cgroup %v: %v", path, err)
}
}

for _, pid := range pids {
if pid <= 0 {
s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
continue
}

if err := noConstraintsCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
return fmt.Errorf("Could not add hypervisor PID %d to cgroup %v: %v", pid, path, err)
if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err)
}

}

// when new container joins, new CPU could be hotplugged, so we
// have to query fresh vcpu info from hypervisor for every time.
// have to query fresh vcpu info from hypervisor every time.
tids, err := s.hypervisor.getThreadIDs()
if err != nil {
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
Expand Down

0 comments on commit 0871038

Please sign in to comment.