Skip to content

Commit

Permalink
fix(sysadvisor): fix failed to find regions
Browse files Browse the repository at this point in the history
Signed-off-by: linzhecheng <linzhecheng@bytedance.com>
  • Loading branch information
cheney-lin committed May 11, 2023
1 parent be47c25 commit 0105f93
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 16 deletions.
34 changes: 29 additions & 5 deletions pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -494,15 +494,39 @@ func (cra *cpuResourceAdvisor) gc() {
}
}

func (cra *cpuResourceAdvisor) getContainerRegions(ci *types.ContainerInfo) ([]region.QoSRegion, error) {
func (cra *cpuResourceAdvisor) getRegionsByRegionNames(names sets.String) []region.QoSRegion {
var regions []region.QoSRegion = nil
for regionName := range ci.RegionNames {
r, ok := cra.regionMap[regionName]
for regionName := range names {
region, ok := cra.regionMap[regionName]
if !ok {
return nil, fmt.Errorf("failed to find region %v", regionName)
return nil
}
regions = append(regions, r)
regions = append(regions, region)
}
return regions
}

func (cra *cpuResourceAdvisor) getRegionsByPodUID(podUID string) []region.QoSRegion {
var regions []region.QoSRegion = nil
for _, r := range cra.regionMap {
podSet := r.GetPods()
for uid := range podSet {
if uid == podUID {
regions = append(regions, r)
}
}
}
return regions
}

func (cra *cpuResourceAdvisor) getContainerRegions(ci *types.ContainerInfo) ([]region.QoSRegion, error) {
regions := cra.getRegionsByRegionNames(ci.RegionNames)
if len(regions) > 0 {
return regions, nil
}

// The containers of the same pod belong to the same region
regions = cra.getRegionsByPodUID(ci.PodUID)
return regions, nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,14 @@ func (p *PolicyCanonical) Update() error {
if err != nil {
return err
}
klog.Infof("[qosaware-cpu-provision] cpu requirement estimation: %.2f, #container %v", cpuEstimation, containerCnt)

// we need to call SetLatestCPURequirement to ensure the previous requirements are passed to
// regulator in case that sysadvisor restarts, to avoid the slow-start always begin with zero.
p.regulator.SetLatestCPURequirement(p.requirement)
p.regulator.Regulate(cpuEstimation)
p.requirement = p.regulator.GetCPURequirement()

klog.Infof("[qosaware-cpu-provision] cpu requirement estimation: %.2f, requirement: %v #container %v", cpuEstimation, p.requirement, containerCnt)
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ func (c *CPURegulator) SetLatestCPURequirement(latestCPURequirement int) {
func (c *CPURegulator) Regulate(cpuRequirementRaw float64) {
cpuRequirement := cpuRequirementRaw + float64(c.ReservedForAllocate)
cpuRequirement = c.slowdown(cpuRequirement)
cpuRequirementInt := c.round(cpuRequirement)
cpuRequirementInt = c.clamp(cpuRequirementInt)
cpuRequirementRound := c.round(cpuRequirement)
cpuRequirementInt := c.clamp(cpuRequirementRound)

klog.Infof("[qosaware-cpu] cpu requirement by policy: %.2f, after post process: %v, added reserved: %v",
cpuRequirementRaw, cpuRequirementInt, c.ReservedForAllocate)
klog.Infof("[qosaware-cpu] cpu requirement by policy: %.2f, after slowdown %.2f, after round %v, after post process: %v, added reserved: %v",
cpuRequirementRaw, cpuRequirement, cpuRequirementRound, cpuRequirementInt, c.ReservedForAllocate)

if cpuRequirementInt != c.latestCPURequirement {
c.latestCPURequirement = cpuRequirementInt
Expand All @@ -96,6 +96,7 @@ func (c *CPURegulator) slowdown(cpuRequirement float64) float64 {
// Restrict ramp down period
if cpuRequirement < latestCPURequirement && now.Before(c.latestRampDownTime.Add(c.minRampDownPeriod)) {
cpuRequirement = latestCPURequirement
return cpuRequirement
}

// Restrict ramp up and down step
Expand Down
13 changes: 7 additions & 6 deletions pkg/agent/sysadvisor/plugin/qosaware/server/cpu/cpu_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,13 +385,14 @@ func (cs *cpuServer) removePod(podUID string) error {
}

func (cs *cpuServer) updatePoolInfo(poolName string, info *cpuadvisor.AllocationInfo) error {
// No need to check existence of pool because cpu plugin may return non-exist
// pool such as fallback. GC is needed for pool maintenance.
pi := &types.PoolInfo{
PoolName: info.OwnerPoolName,
TopologyAwareAssignments: machine.TransformCPUAssignmentFormat(info.TopologyAwareAssignments),
OriginalTopologyAwareAssignments: machine.TransformCPUAssignmentFormat(info.OriginalTopologyAwareAssignments),
pi, ok := cs.metaCache.GetPoolInfo(poolName)
if !ok {
pi = &types.PoolInfo{
PoolName: info.OwnerPoolName,
}
}
pi.TopologyAwareAssignments = machine.TransformCPUAssignmentFormat(info.TopologyAwareAssignments)
pi.OriginalTopologyAwareAssignments = machine.TransformCPUAssignmentFormat(info.OriginalTopologyAwareAssignments)

return cs.metaCache.SetPoolInfo(poolName, pi)
}
Expand Down

0 comments on commit 0105f93

Please sign in to comment.