From 46ed456fe0c67d99e1631c8d1f679ac603d1a238 Mon Sep 17 00:00:00 2001 From: lilianrong Date: Thu, 4 Jul 2024 21:09:54 +0800 Subject: [PATCH] fix(advisor): fix reclaim overlap share cores --- .../provisionassembler/assembler_common.go | 33 +++--- .../assembler_common_test.go | 2 +- .../assembler/provisionassembler/helper.go | 31 +++++ .../provisionassembler/helper_test.go | 109 ++++++++++++++++++ 4 files changed, 155 insertions(+), 20 deletions(-) create mode 100644 pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper_test.go diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go index 654fe17bb..874347111 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common.go @@ -19,7 +19,6 @@ package provisionassembler import ( "context" "fmt" - "math" "time" "k8s.io/klog/v2" @@ -144,7 +143,7 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul calculationResult.SetPoolEntry(poolName, regionNuma, size) } - reclaimedCoresSize := available - general.SumUpMapValues(poolSizes) + reservedForReclaim + var reclaimedCoresSize int if *pa.allowSharedCoresOverlapReclaimedCores { reclaimedCoresAvail := poolSizes[r.OwnerPoolName()] - poolRequirements[r.OwnerPoolName()] if !nodeEnableReclaim { @@ -153,6 +152,11 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul reclaimedCoresSize = general.Max(pa.getNumasReservedForReclaim(r.GetBindingNumas()), reclaimedCoresAvail) reclaimedCoresSize = general.Min(reclaimedCoresSize, poolSizes[r.OwnerPoolName()]) calculationResult.SetPoolOverlapInfo(state.PoolNameReclaim, regionNuma, r.OwnerPoolName(), reclaimedCoresSize) + } else { + reclaimedCoresSize = available - general.SumUpMapValues(poolSizes) + reservedForReclaim + if !nodeEnableReclaim { + reclaimedCoresSize = reservedForReclaim + } } calculationResult.SetPoolEntry(state.PoolNameReclaim, regionNuma, reclaimedCoresSize) @@ -236,7 +240,7 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul calculationResult.SetPoolEntry(poolName, state.FakedNUMAID, poolSize) } - reclaimPoolSizeOfNonBindingNUMAs := shareAndIsolatedPoolAvailable - general.SumUpMapValues(shareAndIsolatePoolSizes) + pa.getNumasReservedForReclaim(*pa.nonBindingNumas) + var reclaimPoolSizeOfNonBindingNUMAs int if *pa.allowSharedCoresOverlapReclaimedCores { isolated := shareAndIsolatedPoolAvailable sharePoolSizes := make(map[string]int) @@ -251,22 +255,8 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul if len(sharePoolSizes) > 0 { reclaimPoolSizeOfNonBindingNUMAs = general.Min(reclaimPoolSizeOfNonBindingNUMAs, general.SumUpMapValues(sharePoolSizes)) - sharedOverlapReclaimSize := make(map[string]int) // sharedPoolName -> reclaimedSize - sharePoolSum := general.SumUpMapValues(sharePoolSizes) - ps := general.SortedByValue(sharePoolSizes) - for _, p := range ps { - sharePoolSize := p.Value - sharePoolName := p.Key - size := int(math.Floor(float64(reclaimPoolSizeOfNonBindingNUMAs*sharePoolSize) / float64(sharePoolSum))) - if size > 0 { - sharedOverlapReclaimSize[sharePoolName] = size - } - } - - diff := reclaimPoolSizeOfNonBindingNUMAs - general.SumUpMapValues(sharedOverlapReclaimSize) - if diff > 0 { - sharedOverlapReclaimSize[ps[0].Key] += diff - } else if diff < 0 { + sharedOverlapReclaimSize, err := regulateOverlapReclaimPoolSize(sharePoolSizes, reclaimPoolSizeOfNonBindingNUMAs) + if err != nil { return types.InternalCPUCalculationResult{}, fmt.Errorf("failed to calculate sharedOverlapReclaimSize") } @@ -274,6 +264,11 @@ func (pa *ProvisionAssemblerCommon) AssembleProvision() (types.InternalCPUCalcul calculationResult.SetPoolOverlapInfo(state.PoolNameReclaim, state.FakedNUMAID, overlapPoolName, size) } } + } else { + reclaimPoolSizeOfNonBindingNUMAs = shareAndIsolatedPoolAvailable - general.SumUpMapValues(shareAndIsolatePoolSizes) + pa.getNumasReservedForReclaim(*pa.nonBindingNumas) + if !nodeEnableReclaim { + reclaimPoolSizeOfNonBindingNUMAs = pa.getNumasReservedForReclaim(*pa.nonBindingNumas) + } } calculationResult.SetPoolEntry(state.PoolNameReclaim, state.FakedNUMAID, reclaimPoolSizeOfNonBindingNUMAs) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go index 9ec03efc8..5e574417c 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/assembler_common_test.go @@ -918,7 +918,7 @@ func TestAssembleProvision(t *testing.T) { }, }, expectPoolOverlapInfo: map[string]map[int]map[string]int{ - "reclaim": {-1: map[string]int{"share-a": 15, "share-b": 19}}, + "reclaim": {-1: map[string]int{"share-a": 14, "share-b": 20}}, }, }, } diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go index 13d1b4693..f11f236c3 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper.go @@ -32,6 +32,37 @@ func getNUMAsResource(resources map[int]int, numas machine.CPUSet) int { return res } +func regulateOverlapReclaimPoolSize(sharePoolSizes map[string]int, overlapReclaimPoolSizeRequired int) (map[string]int, error) { + sharePoolSum := general.SumUpMapValues(sharePoolSizes) + if overlapReclaimPoolSizeRequired > sharePoolSum { + return nil, fmt.Errorf("invalid sharedOverlapReclaimSize") + } + + overlapReclaimPoolSizeRequiredLeft := overlapReclaimPoolSizeRequired + sharedOverlapReclaimSize := make(map[string]int) // sharedPoolName -> reclaimedSize + ps := general.SortedByValue(sharePoolSizes) + for i := 0; i < len(ps); i++ { + index := len(ps) - 1 - i + sharePoolSize := ps[index].Value + sharePoolName := ps[index].Key + + size := int(math.Ceil(float64(overlapReclaimPoolSizeRequired*sharePoolSize) / float64(sharePoolSum))) + if size > sharePoolSize { + size = sharePoolSize + } + if size > overlapReclaimPoolSizeRequiredLeft { + size = overlapReclaimPoolSizeRequiredLeft + } + sharedOverlapReclaimSize[sharePoolName] = size + overlapReclaimPoolSizeRequiredLeft -= size + if overlapReclaimPoolSizeRequiredLeft == 0 { + break + } + } + + return sharedOverlapReclaimSize, nil +} + // regulatePoolSizes modifies pool size map to legal values, taking total available // resource and config such as enable reclaim into account. should be compatible with // any case and not return error. return true if reach resource upper bound. diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper_test.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper_test.go new file mode 100644 index 000000000..a211c4424 --- /dev/null +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/assembler/provisionassembler/helper_test.go @@ -0,0 +1,109 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package provisionassembler + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +type regulateOverlapReclaimPoolSizeCase struct { + name string + sharePoolSizes map[string]int + overlapReclaimPoolSizeRequired int + expectError error + expectOverlapReclaimPoolSize map[string]int +} + +func Test_regulateOverlapReclaimPoolSize(t *testing.T) { + t.Parallel() + + testCases := []regulateOverlapReclaimPoolSizeCase{ + { + name: "case1", + sharePoolSizes: map[string]int{ + "share": 9, + "bmq": 8, + "flink": 7, + }, + overlapReclaimPoolSizeRequired: 30, + expectError: fmt.Errorf("invalid sharedOverlapReclaimSize"), + expectOverlapReclaimPoolSize: nil, + }, + { + name: "case2", + sharePoolSizes: map[string]int{ + "share": 9, + "bmq": 8, + "flink": 7, + }, + overlapReclaimPoolSizeRequired: 2, + expectError: nil, + expectOverlapReclaimPoolSize: map[string]int{ + "share": 1, + "bmq": 1, + }, + }, + { + name: "case3", + sharePoolSizes: map[string]int{ + "share": 9, + "bmq": 8, + "flink": 7, + }, + overlapReclaimPoolSizeRequired: 4, + expectError: nil, + expectOverlapReclaimPoolSize: map[string]int{ + "share": 2, + "bmq": 2, + }, + }, + { + name: "case4", + sharePoolSizes: map[string]int{ + "share": 9, + "bmq": 8, + "flink": 7, + }, + overlapReclaimPoolSizeRequired: 8, + expectError: nil, + expectOverlapReclaimPoolSize: map[string]int{ + "share": 3, + "bmq": 3, + "flink": 2, + }, + }, + } + + for _, testCase := range testCases { + func(testCase regulateOverlapReclaimPoolSizeCase) { + t.Run(testCase.name, func(t *testing.T) { + t.Parallel() + + result, err := regulateOverlapReclaimPoolSize(testCase.sharePoolSizes, testCase.overlapReclaimPoolSizeRequired) + if testCase.expectError == nil { + require.NoError(t, err, "failed to regulate overlap reclaim pool size") + } else { + require.EqualError(t, err, testCase.expectError.Error(), "invalid error return") + } + require.Equal(t, testCase.expectOverlapReclaimPoolSize, result, "invalid result") + }) + }(testCase) + } +}