From 931278a7d54ba591996cda40dca7639cf8097182 Mon Sep 17 00:00:00 2001 From: linzhecheng Date: Tue, 12 Sep 2023 18:51:12 +0800 Subject: [PATCH] refactor(sysadvisor): bind reclaim_cores contianers to non-exclusive numas Signed-off-by: linzhecheng --- .../qosaware/resource/memory/advisor_test.go | 19 ++++++-- .../resource/memory/plugin/memset_binder.go | 47 ++++++++++++++++++- 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go index 58f5f252ab..7a9cf5f3b6 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/advisor_test.go @@ -119,13 +119,16 @@ func newTestMemoryAdvisor(t *testing.T, pods []*v1.Pod, checkpointDir, stateFile cpuTopology, err := machine.GenerateDummyCPUTopology(96, 2, 4) require.NoError(t, err) + memoryTopology, err := machine.GenerateDummyMemoryTopology(4, 500<<30) + require.NoError(t, err) metaServer.MetaAgent = &agent.MetaAgent{ KatalystMachineInfo: &machine.KatalystMachineInfo{ MachineInfo: &info.MachineInfo{ MemoryCapacity: 1000 << 30, }, - CPUTopology: cpuTopology, + CPUTopology: cpuTopology, + MemoryTopology: memoryTopology, }, PodFetcher: &pod.PodFetcherStub{ PodList: pods, @@ -674,27 +677,33 @@ func TestUpdate(t *testing.T) { map[int]machine.CPUSet{ 0: machine.MustParse("1"), }, 200<<30), + makeContainerInfo("uid4", "default", "pod4", "c4", consts.PodAnnotationQoSLevelDedicatedCores, map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable}, + map[int]machine.CPUSet{ + 0: machine.MustParse("1"), + }, 200<<30), }, plugins: []types.MemoryAdvisorPluginName{memadvisorplugin.MemsetBinder}, nodeMetrics: defaultNodeMetrics, numaMetrics: defaultNumaMetrics, - wantHeadroom: *resource.NewQuantity(996<<30, resource.DecimalSI), + wantHeadroom: *resource.NewQuantity(871<<30, resource.DecimalSI), wantAdviceResult: types.InternalMemoryCalculationResult{ ContainerEntries: []types.ContainerMemoryAdvices{ { PodUID: "uid1", ContainerName: "c1", - Values: map[string]string{string(memoryadvisor.ControlKnobKeyCPUSetMems): "0-1"}, + Values: map[string]string{string(memoryadvisor.ControlKnobKeyCPUSetMems): "1-3"}, }, { PodUID: "uid2", ContainerName: "c2", - Values: map[string]string{string(memoryadvisor.ControlKnobKeyCPUSetMems): "0-1"}, + Values: map[string]string{string(memoryadvisor.ControlKnobKeyCPUSetMems): "1-3"}, }, { PodUID: "uid3", ContainerName: "c3", - Values: map[string]string{string(memoryadvisor.ControlKnobKeyCPUSetMems): "0"}, + Values: map[string]string{string(memoryadvisor.ControlKnobKeyCPUSetMems): "1-3"}, }, }, }, diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memset_binder.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memset_binder.go index d70da40ceb..a901a77ab6 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memset_binder.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/memory/plugin/memset_binder.go @@ -17,11 +17,16 @@ limitations under the License. package plugin import ( + "context" + "fmt" "sync" + "k8s.io/apimachinery/pkg/util/errors" + apiconsts "github.com/kubewharf/katalyst-api/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor" "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache" + "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware/resource/helper" "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types" "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/consts" @@ -38,14 +43,18 @@ const ( type memsetBinder struct { mutex sync.RWMutex + conf *config.Configuration metaReader metacache.MetaReader + metaServer *metaserver.MetaServer emitter metrics.MetricEmitter containerMemset map[consts.PodContainerName]machine.CPUSet } func NewMemsetBinder(conf *config.Configuration, extraConfig interface{}, metaReader metacache.MetaReader, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter) MemoryAdvisorPlugin { return &memsetBinder{ + conf: conf, metaReader: metaReader, + metaServer: metaServer, emitter: emitter, } } @@ -55,18 +64,52 @@ func (mb *memsetBinder) reclaimedContainersFilter(ci *types.ContainerInfo) bool } func (mb *memsetBinder) Reconcile(status *types.MemoryPressureStatus) error { + var ( + errList []error + ) + + allNUMAs := mb.metaServer.CPUDetails.NUMANodes() + + availNUMAs := allNUMAs + containerMemset := make(map[consts.PodContainerName]machine.CPUSet) containers := make([]*types.ContainerInfo, 0) mb.metaReader.RangeContainer(func(podUID string, containerName string, containerInfo *types.ContainerInfo) bool { if mb.reclaimedContainersFilter(containerInfo) { containers = append(containers, containerInfo) + return true + } + + nodeReclaim := mb.conf.GetDynamicConfiguration().EnableReclaim + reclaimEnable, err := helper.PodEnableReclaim(context.Background(), mb.metaServer, podUID, nodeReclaim) + if err != nil { + errList = append(errList, err) + return true + } + + if containerInfo.IsNumaExclusive() && !reclaimEnable { + memset := machine.GetCPUAssignmentNUMAs(containerInfo.TopologyAwareAssignments) + if memset.IsEmpty() { + errList = append(errList, fmt.Errorf("contianer(%v/%v) TopologyAwareAssignments is empty", containerInfo.PodName, containerName)) + return true + } + availNUMAs = availNUMAs.Difference(memset) } return true }) + err := errors.NewAggregate(errList) + if err != nil { + return err + } + + if availNUMAs.IsEmpty() { + availNUMAs = allNUMAs + general.InfoS("availNUMAs is empty, have to bind all NUMAs to reclaimed_cores containers") + } + for _, ci := range containers { - memset := machine.GetCPUAssignmentNUMAs(ci.TopologyAwareAssignments) - containerMemset[native.GeneratePodContainerName(ci.PodUID, ci.ContainerName)] = memset + containerMemset[native.GeneratePodContainerName(ci.PodUID, ci.ContainerName)] = availNUMAs } mb.mutex.Lock() defer mb.mutex.Unlock()