Skip to content

Commit

Permalink
sandbox/cgroups: don't constrain if using SandboxCgroupsOnly
Browse files Browse the repository at this point in the history
When SandboxCgroupsOnly is set, we are expected to just inherit our parent's
cgroup settings and to move all Kata threads within that sandbox cgroup. The
initial implementation still adjusted the size of this cgroup. This commit
fixes this.

This commit makes a couple of functional changes, small refactors, and
adds clarifying comments for some functions.

Fixes: kata-containers#2090

Signed-off-by: Eric Ernst <eric.ernst@intel.com>
  • Loading branch information
Eric Ernst committed Oct 31, 2019
1 parent ee527d3 commit 3308077
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 27 deletions.
4 changes: 2 additions & 2 deletions virtcontainers/cgroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const cgroupKataPrefix = "kata"
var cgroupsLoadFunc = cgroups.Load
var cgroupsNewFunc = cgroups.New

// V1Constraints returns the cgroups that are compatible with th VC architecture
// V1Constraints returns the cgroups that are compatible with the VC architecture
// and hypervisor, constraints can be applied to these cgroups.
func V1Constraints() ([]cgroups.Subsystem, error) {
root, err := cgroupV1MountPoint()
Expand All @@ -51,7 +51,7 @@ func V1Constraints() ([]cgroups.Subsystem, error) {
return cgroupsSubsystems(subsystems)
}

// V1NoConstraints returns the cgroups that are *not* compatible with th VC
// V1NoConstraints returns the cgroups that are *not* compatible with the VC
// architecture and hypervisor, constraints MUST NOT be applied to these cgroups.
func V1NoConstraints() ([]cgroups.Subsystem, error) {
root, err := cgroupV1MountPoint()
Expand Down
1 change: 1 addition & 0 deletions virtcontainers/cgroups_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ func TestUpdateCgroups(t *testing.T) {
state: types.SandboxState{
CgroupPath: "",
},
config: &SandboxConfig{SandboxCgroupOnly: false},
}

// empty path
Expand Down
83 changes: 58 additions & 25 deletions virtcontainers/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -1809,10 +1809,12 @@ func (s *Sandbox) AddDevice(info config.DeviceInfo) (api.Device, error) {
return b, nil
}

// updateResources will calculate the resources required for the virtual machine, and
// adjust the virtual machine sizing accordingly. For a given sandbox, it will calculate the
// number of vCPUs required based on the sum of container requests, plus default CPUs for the VM.
// Similar is done for memory. If changes in memory or CPU are made, the VM will be updated and
// the agent will online the applicable CPU and memory.
func (s *Sandbox) updateResources() error {
// the hypervisor.MemorySize is the amount of memory reserved for
// the VM and contaniners without memory limit

if s == nil {
return errors.New("sandbox is nil")
}
Expand All @@ -1825,16 +1827,18 @@ func (s *Sandbox) updateResources() error {
// Add default vcpus for sandbox
sandboxVCPUs += s.hypervisor.hypervisorConfig().NumVCPUs

sandboxMemoryByte := int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift
sandboxMemoryByte += s.calculateSandboxMemory()
sandboxMemoryByte := s.calculateSandboxMemory()
// Add default / rsvd memory for sandbox.
sandboxMemoryByte += int64(s.hypervisor.hypervisorConfig().MemorySize) << utils.MibToBytesShift

// Update VCPUs
s.Logger().WithField("cpus-sandbox", sandboxVCPUs).Debugf("Request to hypervisor to update vCPUs")
oldCPUs, newCPUs, err := s.hypervisor.resizeVCPUs(sandboxVCPUs)
if err != nil {
return err
}
// The CPUs were increased, ask agent to online them

// If the CPUs were increased, ask agent to online them
if oldCPUs < newCPUs {
vcpusAdded := newCPUs - oldCPUs
if err := s.agent.onlineCPUMem(vcpusAdded, true); err != nil {
Expand All @@ -1851,7 +1855,7 @@ func (s *Sandbox) updateResources() error {
}
s.Logger().Debugf("Sandbox memory size: %d MB", newMemory)
if s.state.GuestMemoryHotplugProbe && updatedMemoryDevice.addr != 0 {
//notify the guest kernel about memory hot-add event, before onlining them
// notify the guest kernel about memory hot-add event, before onlining them
s.Logger().Debugf("notify guest kernel memory hot-add event via probe interface, memory device located at 0x%x", updatedMemoryDevice.addr)
if err := s.agent.memHotplugByProbe(updatedMemoryDevice.addr, uint32(updatedMemoryDevice.sizeMB), s.state.GuestMemoryBlockSizeMB); err != nil {
return err
Expand Down Expand Up @@ -1893,7 +1897,19 @@ func (s *Sandbox) GetHypervisorType() string {
return string(s.config.HypervisorType)
}

// cgroupsUpdate will:
// 1) get the v1constraints cgroup associated with the stored cgroup path
// 2) (re-)add hypervisor vCPU threads to the appropriate cgroup
// 3) If we are managing sandbox cgroup, update the v1constraints cgroup size
func (s *Sandbox) cgroupsUpdate() error {

// If Kata is configured for SandboxCgroupOnly, the VMM and its processes are already
// in the Kata sandbox cgroup (inherited). No need to move threads/processes, and we should
// rely on parent's cgroup CPU/memory values
if s.config.SandboxCgroupOnly {
return nil
}

if s.state.CgroupPath == "" {
s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty")
return nil
Expand All @@ -1904,7 +1920,7 @@ func (s *Sandbox) cgroupsUpdate() error {
return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err)
}

if err := s.constrainHypervisorVCPUs(cgroup); err != nil {
if err := s.constrainHypervisor(cgroup); err != nil {
return err
}

Expand All @@ -1925,6 +1941,8 @@ func (s *Sandbox) cgroupsUpdate() error {
return nil
}

// cgroupsDelete will move the running processes in the sandbox cgroup
// to the parent and then delete the sandbox cgroup
func (s *Sandbox) cgroupsDelete() error {
s.Logger().Debug("Deleting sandbox cgroup")
if s.state.CgroupPath == "" {
Expand All @@ -1933,19 +1951,19 @@ func (s *Sandbox) cgroupsDelete() error {
}

var path string
cgroupSubystems := V1NoConstraints
var cgroupSubsystems cgroups.Hierarchy

if s.config.SandboxCgroupOnly {
// Override V1NoConstraints, if SandboxCgroupOnly is enabled
cgroupSubystems = cgroups.V1
cgroupSubsystems = cgroups.V1
path = s.state.CgroupPath
s.Logger().WithField("path", path).Debug("Deleting sandbox cgroups (all subsystems)")
} else {
cgroupSubsystems = V1NoConstraints
path = cgroupNoConstraintsPath(s.state.CgroupPath)
s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup")
}

sandboxCgroups, err := cgroupsLoadFunc(cgroupSubystems, cgroups.StaticPath(path))
sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path))
if err == cgroups.ErrCgroupDeleted {
// cgroup already deleted
s.Logger().Warnf("cgroup already deleted: '%s'", err)
Expand All @@ -1957,7 +1975,7 @@ func (s *Sandbox) cgroupsDelete() error {
}

// move running process here, that way cgroup can be removed
parent, err := parentCgroup(cgroupSubystems, path)
parent, err := parentCgroup(cgroupSubsystems, path)
if err != nil {
// parent cgroup doesn't exist, that means there are no process running
// and the no constraints cgroup was removed.
Expand All @@ -1973,35 +1991,50 @@ func (s *Sandbox) cgroupsDelete() error {
return sandboxCgroups.Delete()
}

func (s *Sandbox) constrainHypervisorVCPUs(cgroup cgroups.Cgroup) error {
// constrainHypervisor will place the VMM and vCPU threads into cgroups.
func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error {
// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
// cgroup
if s.config.SandboxCgroupOnly {
// Kata components were moved into the sandbox-cgroup already, so VMM
// will already land there as well. No need to take action
return nil
}

pids := s.hypervisor.getPids()
if len(pids) == 0 || pids[0] == 0 {
return fmt.Errorf("Invalid hypervisor PID: %+v", pids)
}

// Move hypervisor into cgroups without constraints,
// those cgroups are not yet supported.
// VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set.
// This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take
// Kata/VMM into account, Kata may fail to boot due to being overconstrained.
// If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained
// cgroup
// Move the VMM into cgroups without constraints, those cgroups are not yet supported.
resources := &specs.LinuxResources{}
path := cgroupNoConstraintsPath(s.state.CgroupPath)
noConstraintsCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources)
if err != nil {
return fmt.Errorf("Could not create cgroup %v: %v", path, err)

}

for _, pid := range pids {
if pid <= 0 {
s.Logger().Warnf("Invalid hypervisor pid: %d", pid)
continue
}

if err := noConstraintsCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
return fmt.Errorf("Could not add hypervisor PID %d to cgroup %v: %v", pid, path, err)
if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil {
return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err)
}

}

// when new container joins, new CPU could be hotplugged, so we
// have to query fresh vcpu info from hypervisor for every time.
// have to query fresh vcpu info from hypervisor every time.
tids, err := s.hypervisor.getThreadIDs()
if err != nil {
return fmt.Errorf("failed to get thread ids from hypervisor: %v", err)
Expand All @@ -2013,9 +2046,9 @@ func (s *Sandbox) constrainHypervisorVCPUs(cgroup cgroups.Cgroup) error {
return nil
}

// We are about to move just the vcpus (threads) into cgroups with constraints.
// Move whole hypervisor process whould be easier but the IO/network performance
// whould be impacted.
// Move vcpus (threads) into cgroups with constraints.
// Move whole hypervisor process would be easier but the IO/network performance
// would be over-constrained.
for _, i := range tids.vcpus {
// In contrast, AddTask will write thread id to `tasks`
// After this, vcpu threads are in "vcpu" sub-cgroup, other threads in
Expand Down

0 comments on commit 3308077

Please sign in to comment.