From ffa4262a0375a65cba754a03571ec942f3592f80 Mon Sep 17 00:00:00 2001 From: zhou hongbin <131335757+zzzzhhb@users.noreply.github.com> Date: Thu, 14 Mar 2024 14:50:25 +0800 Subject: [PATCH] fix(sysadvisor): numa balancer miss eviction (#506) --- .../qosaware/resource/memory/advisor_test.go | 4 ++-- .../resource/memory/plugin/memory_balancer.go | 22 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go index 23597888c..69c109515 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go @@ -1156,7 +1156,7 @@ func TestUpdate(t *testing.T) { ExtraEntries: []types.ExtraMemoryAdvices{ { Values: map[string]string{ - string(memoryadvisor.ControlKnobKeyBalanceNumaMemory): "{\"destNumaList\":[1],\"sourceNuma\":0,\"migrateContainers\":[{\"podUID\":\"uid4\",\"containerName\":\"c4\",\"destNumaList\":[0,1,2,3]},{\"podUID\":\"uid2\",\"containerName\":\"c2\",\"destNumaList\":[0,1]},{\"podUID\":\"uid1\",\"containerName\":\"c1\",\"destNumaList\":[0,1]}],\"totalRSS\":4294967296,\"threshold\":0.7}", + string(memoryadvisor.ControlKnobKeyBalanceNumaMemory): "{\"destNumaList\":[1],\"sourceNuma\":0,\"migrateContainers\":[{\"podUID\":\"uid4\",\"containerName\":\"c4\",\"destNumaList\":[0,1,2,3]},{\"podUID\":\"uid3\",\"containerName\":\"c3\",\"destNumaList\":[0,1,2,3]}],\"totalRSS\":3221225472,\"threshold\":0.7}", }, }, }, @@ -1600,7 +1600,7 @@ func TestUpdate(t *testing.T) { ExtraEntries: []types.ExtraMemoryAdvices{ { Values: map[string]string{ - string(memoryadvisor.ControlKnobKeyBalanceNumaMemory): "{\"destNumaList\":[1],\"sourceNuma\":0,\"migrateContainers\":[{\"podUID\":\"uid4\",\"containerName\":\"c4\",\"destNumaList\":[0,1,2,3]},{\"podUID\":\"uid2\",\"containerName\":\"c2\",\"destNumaList\":[0,1]},{\"podUID\":\"uid1\",\"containerName\":\"c1\",\"destNumaList\":[0,1]}],\"totalRSS\":4294967296,\"threshold\":0.7}", + string(memoryadvisor.ControlKnobKeyBalanceNumaMemory): "{\"destNumaList\":[1],\"sourceNuma\":0,\"migrateContainers\":[{\"podUID\":\"uid4\",\"containerName\":\"c4\",\"destNumaList\":[0,1,2,3]},{\"podUID\":\"uid3\",\"containerName\":\"c3\",\"destNumaList\":[0,1,2,3]}],\"totalRSS\":3221225472,\"threshold\":0.7}", }, }, }, diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memory_balancer.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memory_balancer.go index d325e04f6..98254649d 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memory_balancer.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memory_balancer.go @@ -183,6 +183,8 @@ type BalanceInfo struct { Status BalanceStatus `json:"status"` FailedReason string `json:"failedReason"` DetectTime time.Time `json:"detectTime"` + BalanceExecuted bool `json:"balanceExecuted"` + EvictExecuted bool `json:"evictExecuted"` } type memoryBalancer struct { @@ -226,7 +228,7 @@ func (m *memoryBalancer) GetEvictPods(_ context.Context, request *pluginapi.GetE m.mutex.Lock() evictPods := make([]*pluginapi.EvictPod, 0) - if m.balanceInfo == nil { + if m.balanceInfo == nil || m.balanceInfo.EvictExecuted { return &pluginapi.GetEvictPodsResponse{}, nil } @@ -244,7 +246,7 @@ func (m *memoryBalancer) GetEvictPods(_ context.Context, request *pluginapi.GetE } } - m.balanceInfo.EvictPods = make([]EvictPod, 0) + m.balanceInfo.EvictExecuted = true return &pluginapi.GetEvictPodsResponse{EvictPods: evictPods}, nil } @@ -282,6 +284,8 @@ func (m *memoryBalancer) getBalanceInfo() (balanceInfo *BalanceInfo, err error) RawNumaLatencyInfo: make([]*NumaLatencyInfo, 0), Status: BalanceStatusPreparing, DetectTime: time.Now(), + EvictExecuted: false, + BalanceExecuted: false, } defer func() { @@ -357,7 +361,7 @@ func (m *memoryBalancer) getBalanceInfo() (balanceInfo *BalanceInfo, err error) for _, pod := range balanceInfo.EvictPods { evictPodsUIDSet.Insert(pod.UID) } - balanceInfo.BalancePods, balanceInfo.TotalRSS, err = m.getBalancePods(m.conf.SupportedPools, balanceInfo.SourceNuma, balanceInfo.DestNumas, evictPodsUIDSet) + balanceInfo.BalancePods, balanceInfo.TotalRSS, err = m.getBalancePods(m.conf.SupportedPools, balanceInfo.SourceNuma, balanceInfo.DestNumas) if err != nil { return } @@ -596,7 +600,7 @@ func (m *memoryBalancer) getBalancePodsForPool(poolName string, srcNuma *NumaLat return result, nil } -func (m *memoryBalancer) getBalancePods(supportPools []string, srcNuma *NumaLatencyInfo, destNumas []NumaInfo, skipPodsUIDSet sets.String) ([]BalancePod, float64, error) { +func (m *memoryBalancer) getBalancePods(supportPools []string, srcNuma *NumaLatencyInfo, destNumas []NumaInfo) ([]BalancePod, float64, error) { var totalRSS float64 = 0 poolPodSortList := make([]PodSort, 0) reclaimedPodSortList, err := m.getBalancePodsForPool(state.PoolNameReclaim, srcNuma, destNumas, m.conf.BalancedReclaimedPodSourceNumaRSSMin, m.conf.BalancedReclaimedPodSourceNumaRSSMax) @@ -629,9 +633,6 @@ func (m *memoryBalancer) getBalancePods(supportPools []string, srcNuma *NumaLate targetPods := make([]BalancePod, 0) for _, sortPod := range reclaimedPodSortList { - if skipPodsUIDSet.Has(string(sortPod.Pod.UID)) { - continue - } if totalRSS+sortPod.SortValue > float64(m.conf.BalancedReclaimedPodsSourceNumaTotalRSSMax) { break } @@ -646,9 +647,6 @@ func (m *memoryBalancer) getBalancePods(supportPools []string, srcNuma *NumaLate } for _, sortPod := range poolPodSortList { - if skipPodsUIDSet.Has(string(sortPod.Pod.UID)) { - continue - } if totalRSS+sortPod.SortValue > float64(m.conf.BalancedPodsSourceNumaTotalRSSMax) { break @@ -864,7 +862,7 @@ func (m *memoryBalancer) GetAdvices() (result types.InternalMemoryCalculationRes } if !m.balanceInfo.NeedBalance || m.balanceInfo.Status != BalanceStatusPrepareSuccess || - len(m.balanceInfo.BalancePods) == 0 || len(m.balanceInfo.DestNumas) == 0 { + len(m.balanceInfo.BalancePods) == 0 || len(m.balanceInfo.DestNumas) == 0 || m.balanceInfo.BalanceExecuted { return } @@ -901,7 +899,7 @@ func (m *memoryBalancer) GetAdvices() (result types.InternalMemoryCalculationRes m.lastBalanceInfo = m.balanceInfo } // avoid duplicate execution - m.balanceInfo = nil + m.balanceInfo.BalanceExecuted = true return }