From 83bce0c4fa3792bb55a3e3a81c910924a7844f73 Mon Sep 17 00:00:00 2001 From: zhou hongbin <131335757+zzzzhhb@users.noreply.github.com> Date: Tue, 1 Aug 2023 10:33:37 +0800 Subject: [PATCH] enhancement(eviction): load eviction support dynamic threshold (#129) * enhancement(eviction): evict by share pool status * enhancement(eviction): load eviction plugin use readonly state * chore(eviction): make test parallel * chore(eviction): rename some function and add some metric * enhancement(eviction): extract GetPodPoolMapFunc interface * enhancement(eviction): ignore isolation pool * refine implementation for load pressure --------- Co-authored-by: shaowei.wayne --- .../reclaimedresource_base.go | 16 +- .../eviction_resp_collector.go | 13 +- pkg/agent/evictionmanager/manager.go | 3 +- .../dynamicpolicy/cpueviction/cpu_eviciton.go | 11 +- .../cpueviction/strategy/pod_pool_getter.go | 85 ++ .../cpueviction/strategy/pressure_load.go | 205 +++-- .../strategy/pressure_load_metric.go | 5 +- .../strategy/pressure_load_test.go | 831 +++++++++++++++++- .../strategy/pressure_suppression.go | 4 +- .../qrm-plugins/cpu/dynamicpolicy/policy.go | 7 +- .../cpu/dynamicpolicy/state/state.go | 25 + .../cpu/dynamicpolicy/state/util.go | 5 + pkg/agent/qrm-plugins/util/util.go | 14 + .../plugin/qosaware/resource/cpu/advisor.go | 4 +- .../qosaware/resource/cpu/advisor_helper.go | 26 - .../reclaimedresource_base.go | 15 +- pkg/util/machine/cpu.go | 26 + 17 files changed, 1180 insertions(+), 115 deletions(-) create mode 100644 pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pod_pool_getter.go diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go index efffa61be..b28c237e8 100644 --- a/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go @@ -29,10 +29,11 @@ import ( ) type ReclaimedResourceOptions struct { - EnableReclaim bool - ReservedResourceForReport general.ResourceList - MinReclaimedResourceForReport general.ResourceList - ReservedResourceForAllocate general.ResourceList + EnableReclaim bool + ReservedResourceForReport general.ResourceList + MinReclaimedResourceForReport general.ResourceList + ReservedResourceForAllocate general.ResourceList + ReservedResourceForReclaimedCores general.ResourceList *cpuheadroom.CPUHeadroomOptions *memoryheadroom.MemoryHeadroomOptions @@ -53,6 +54,10 @@ func NewReclaimedResourceOptions() *ReclaimedResourceOptions { v1.ResourceCPU: resource.MustParse("4"), v1.ResourceMemory: resource.MustParse("5Gi"), }, + ReservedResourceForReclaimedCores: map[v1.ResourceName]resource.Quantity{ + v1.ResourceCPU: resource.MustParse("4"), + v1.ResourceMemory: resource.MustParse("0"), + }, CPUHeadroomOptions: cpuheadroom.NewCPUHeadroomOptions(), MemoryHeadroomOptions: memoryheadroom.NewMemoryHeadroomOptions(), } @@ -70,6 +75,8 @@ func (o *ReclaimedResourceOptions) AddFlags(fss *cliflag.NamedFlagSets) { "min reclaimed resource report to cnr") fs.Var(&o.ReservedResourceForAllocate, "reserved-resource-for-allocate", "reserved reclaimed resource actually not allocate to reclaimed resource") + fs.Var(&o.ReservedResourceForReclaimedCores, "reserved-resource-for-reclaimed-cores", + "reserved resources for reclaimed_cores pods") o.CPUHeadroomOptions.AddFlags(fss) o.MemoryHeadroomOptions.AddFlags(fss) @@ -82,6 +89,7 @@ func (o *ReclaimedResourceOptions) ApplyTo(c *reclaimedresource.ReclaimedResourc c.ReservedResourceForReport = v1.ResourceList(o.ReservedResourceForReport) c.MinReclaimedResourceForReport = v1.ResourceList(o.MinReclaimedResourceForReport) c.ReservedResourceForAllocate = v1.ResourceList(o.ReservedResourceForAllocate) + c.MinReclaimedResourceForAllocate = v1.ResourceList(o.ReservedResourceForReclaimedCores) errList = append(errList, o.CPUHeadroomOptions.ApplyTo(c.CPUHeadroomConfiguration)) errList = append(errList, o.MemoryHeadroomOptions.ApplyTo(c.MemoryHeadroomConfiguration)) diff --git a/pkg/agent/evictionmanager/eviction_resp_collector.go b/pkg/agent/evictionmanager/eviction_resp_collector.go index 85e77898f..aa5420230 100644 --- a/pkg/agent/evictionmanager/eviction_resp_collector.go +++ b/pkg/agent/evictionmanager/eviction_resp_collector.go @@ -18,6 +18,7 @@ package evictionmanager import ( "fmt" + "strings" //nolint "github.com/golang/protobuf/proto" @@ -30,6 +31,8 @@ import ( "github.com/kubewharf/katalyst-core/pkg/util/general" ) +const effectTagValueSeparator = "_" + // evictionRespCollector is used to collect eviction result from plugins, it also handles some logic such as dry run. type evictionRespCollector struct { conf *pkgconfig.Configuration @@ -91,7 +94,7 @@ func (e *evictionRespCollector) collectEvictPods(dryRunPlugins []string, pluginN e.getLogPrefix(dryRun), pluginName, evictPod.Pod.Namespace, evictPod.Pod.Name, evictPod.Reason, evictPod.ForceEvict) if dryRun { - _ = e.emitter.StoreInt64(MetricsNameDryrunVictimPodCNT, 1, metrics.MetricTypeNameRaw, + _ = e.emitter.StoreInt64(MetricsNameDryRunVictimPodCNT, 1, metrics.MetricTypeNameRaw, metrics.MetricTag{Key: "name", Val: pluginName}, metrics.MetricTag{Key: "victim_ns", Val: evictPod.Pod.Namespace}, metrics.MetricTag{Key: "victim_name", Val: evictPod.Pod.Name}) @@ -143,6 +146,12 @@ func (e *evictionRespCollector) collectMetThreshold(dryRunPlugins []string, plug if resp.Condition != nil && resp.Condition.MetCondition { general.Infof("%v plugin: %s requests to set condition: %s of type: %s", e.getLogPrefix(dryRun), pluginName, resp.Condition.ConditionName, resp.Condition.ConditionType.String()) + _ = e.emitter.StoreInt64(MetricsNameDryRunConditionCNT, 1, metrics.MetricTypeNameRaw, + metrics.MetricTag{Key: "name", Val: pluginName}, + metrics.MetricTag{Key: "condition_name", Val: resp.Condition.ConditionName}, + metrics.MetricTag{Key: "condition_type", Val: fmt.Sprint(resp.Condition.ConditionType)}, + metrics.MetricTag{Key: "effects", Val: strings.Join(resp.Condition.Effects, effectTagValueSeparator)}, + ) if !dryRun { e.getCurrentConditions()[resp.Condition.ConditionName] = proto.Clone(resp.Condition).(*pluginapi.Condition) @@ -162,7 +171,7 @@ func (e *evictionRespCollector) collectTopEvictionPods(dryRunPlugins []string, p general.Infof("%v plugin %v request to evict topN pod %v/%v, reason: met threshold in scope [%v]", e.getLogPrefix(dryRun), pluginName, pod.Namespace, pod.Name, threshold.EvictionScope) if dryRun { - _ = e.emitter.StoreInt64(MetricsNameDryrunVictimPodCNT, 1, metrics.MetricTypeNameRaw, + _ = e.emitter.StoreInt64(MetricsNameDryRunVictimPodCNT, 1, metrics.MetricTypeNameRaw, metrics.MetricTag{Key: "name", Val: pluginName}, metrics.MetricTag{Key: "victim_ns", Val: pod.Namespace}, metrics.MetricTag{Key: "victim_name", Val: pod.Name}) diff --git a/pkg/agent/evictionmanager/manager.go b/pkg/agent/evictionmanager/manager.go index aec7a0a67..bdad9cc31 100644 --- a/pkg/agent/evictionmanager/manager.go +++ b/pkg/agent/evictionmanager/manager.go @@ -52,7 +52,8 @@ const ( MetricsNameVictimPodCNT = "victims_cnt" MetricsNameRunningPodCNT = "running_pod_cnt" MetricsNameCandidatePodCNT = "candidate_pod_cnt" - MetricsNameDryrunVictimPodCNT = "dryrun_victims_cnt" + MetricsNameDryRunVictimPodCNT = "dryrun_victims_cnt" + MetricsNameDryRunConditionCNT = "dryrun_condition_cnt" ) // LatestCNRGetter returns the latest CNR resources. diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/cpu_eviciton.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/cpu_eviciton.go index d45723639..35f995288 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/cpu_eviciton.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/cpu_eviciton.go @@ -50,7 +50,7 @@ type cpuPressureEviction struct { } func NewCPUPressureEviction(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, - conf *config.Configuration, state state.State) (agent.Component, error) { + conf *config.Configuration, state state.ReadonlyState) (agent.Component, error) { plugin, err := newCPUPressureEviction(emitter, metaServer, conf, state) if err != nil { return nil, fmt.Errorf("create cpu eviction plugin failed: %s", err) @@ -60,15 +60,20 @@ func NewCPUPressureEviction(emitter metrics.MetricEmitter, metaServer *metaserve } func newCPUPressureEviction(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, - conf *config.Configuration, state state.State) (skeleton.GenericPlugin, error) { + conf *config.Configuration, state state.ReadonlyState) (skeleton.GenericPlugin, error) { wrappedEmitter := emitter.WithTags(cpuPressureEvictionPluginName) + pressureLoadEviction, err := strategy.NewCPUPressureLoadEviction(emitter, metaServer, conf, state) + if err != nil { + return nil, fmt.Errorf("create CPUPressureLoadEviction plugin failed, err:%v", err) + } + plugin := &cpuPressureEviction{ forceEvictionList: []strategy.CPUPressureForceEviction{ strategy.NewCPUPressureSuppressionEviction(emitter, metaServer, conf, state), }, thresholdEvictionList: []strategy.CPUPressureThresholdEviction{ - strategy.NewCPUPressureLoadEviction(emitter, metaServer, conf, state), + pressureLoadEviction, }, } diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pod_pool_getter.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pod_pool_getter.go new file mode 100644 index 000000000..6749a80a1 --- /dev/null +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pod_pool_getter.go @@ -0,0 +1,85 @@ +/* +Copyright 2022 The Katalyst Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategy + +import ( + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/pod" + "github.com/kubewharf/katalyst-core/pkg/util/general" +) + +var getPodPoolMapFunc = DefaultGetPodPoolMapFunc + +// PodPoolMap is a map keyed by pod UID, the value is a map keyed by container name +// and its value is the container info with owner pool. +type PodPoolMap map[string]map[string]*ContainerOwnerPoolInfo + +func (p PodPoolMap) PutContainerOwnerPoolInfo(podUID string, containerName string, ownerPool string, poolSize int, isPool bool) { + containerOwnerPoolInfo := &ContainerOwnerPoolInfo{ + OwnerPool: ownerPool, + PoolSize: poolSize, + IsPool: isPool, + } + + if podMap, ok := p[podUID]; ok { + podMap[containerName] = containerOwnerPoolInfo + } else { + pm := map[string]*ContainerOwnerPoolInfo{containerName: containerOwnerPoolInfo} + p[podUID] = pm + } +} + +// GetPodPoolMapFunc returns a map keyed by pod UID, the value is a map keyed by container name +// and its value is the container info with owner pool. +type GetPodPoolMapFunc func(pod.PodFetcher, state.ReadonlyState) PodPoolMap + +type ContainerOwnerPoolInfo struct { + OwnerPool string + PoolSize int + IsPool bool +} + +// SetGetPodPoolMapFunc provides a hook to change the implementation of GetPodPoolMapFunc +func SetGetPodPoolMapFunc(f GetPodPoolMapFunc) { + general.Infof("SetGetPodPoolMapFunc called") + getPodPoolMapFunc = f +} + +var DefaultGetPodPoolMapFunc GetPodPoolMapFunc = func(fetcher pod.PodFetcher, readonlyState state.ReadonlyState) PodPoolMap { + result := make(PodPoolMap) + + for podUID, entry := range readonlyState.GetPodEntries() { + for containerName, containerEntry := range entry { + if entry.IsPoolEntry() { + result.PutContainerOwnerPoolInfo(podUID, containerName, podUID, containerEntry.AllocationResult.Size(), true) + continue + } + + if containerEntry == nil { + continue + } else if containerEntry.OwnerPoolName == "" { + general.Infof("skip get pool name for pod: %s, "+ + "container: %s with owner pool name: %s", podUID, containerName, containerEntry.OwnerPoolName) + continue + } + + result.PutContainerOwnerPoolInfo(podUID, containerName, containerEntry.OwnerPoolName, containerEntry.AllocationResult.Size(), false) + } + } + + return result +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go index 60b040c87..1c2695f06 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "sort" + "strconv" "sync" "time" @@ -30,6 +31,7 @@ import ( pluginapi "github.com/kubewharf/katalyst-api/pkg/protocol/evictionplugin/v1alpha1" advisorapi "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + qrmutil "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util" "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" "github.com/kubewharf/katalyst-core/pkg/config/generic" @@ -37,6 +39,7 @@ import ( "github.com/kubewharf/katalyst-core/pkg/metaserver" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/general" + "github.com/kubewharf/katalyst-core/pkg/util/machine" "github.com/kubewharf/katalyst-core/pkg/util/native" ) @@ -47,11 +50,14 @@ const ( metricsNamePoolMetricValue = "pool_metric_value_raw" metricsNamePoolMetricBound = "pool_metric_bound_raw" metricsNameThresholdMet = "threshold_met_count" + metricNameCollectPoolLoadCalled = "collect_pool_load_called" - metricsTagKeyMetricName = "metric_name" - metricsTagKeyPoolName = "pool_name" - metricsTagKeyBoundType = "bound_type" - metricsTagKeyThresholdMetType = "threshold_type" + metricsTagKeyMetricName = "metric_name" + metricsTagKeyPoolName = "pool_name" + metricsTagKeyBoundType = "bound_type" + metricsTagKeyThresholdMetType = "threshold_type" + metricsTagKeyAdvisedThreshold = "advised_threshold" + metricsTagKeyPressureByPoolSize = "pool_sized_pressure" metricsTagValueBoundTypeUpper = "upper" metricsTagValueBoundTypeLower = "lower" @@ -74,7 +80,7 @@ var ( type CPUPressureLoadEviction struct { sync.Mutex - state state.State + state state.ReadonlyState emitter metrics.MetricEmitter metaServer *metaserver.MetaServer qosConf *generic.QoSConfiguration @@ -87,10 +93,12 @@ type CPUPressureLoadEviction struct { lastEvictionTime time.Time poolMetricCollectHandlers map[string]PoolMetricCollectHandler + + systemReservedCPUs machine.CPUSet } func NewCPUPressureLoadEviction(emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer, - conf *config.Configuration, state state.State) CPUPressureThresholdEviction { + conf *config.Configuration, state state.ReadonlyState) (CPUPressureThresholdEviction, error) { plugin := &CPUPressureLoadEviction{ state: state, emitter: emitter, @@ -101,10 +109,18 @@ func NewCPUPressureLoadEviction(emitter metrics.MetricEmitter, metaServer *metas syncPeriod: conf.LoadEvictionSyncPeriod, } + systemReservedCores, reserveErr := qrmutil.GetCoresReservedForSystem(conf, metaServer.KatalystMachineInfo, metaServer.CPUDetails.CPUs().Clone()) + if reserveErr != nil { + general.Errorf("GetCoresReservedForSystem for reservedCPUsNum: %d failed with error: %v", + conf.ReservedCPUCores, reserveErr) + return plugin, reserveErr + } + plugin.systemReservedCPUs = systemReservedCores + plugin.poolMetricCollectHandlers = map[string]PoolMetricCollectHandler{ consts.MetricLoad1MinContainer: plugin.collectPoolLoad, } - return plugin + return plugin, nil } func (p *CPUPressureLoadEviction) Start(ctx context.Context) (err error) { @@ -127,21 +143,19 @@ func (p *CPUPressureLoadEviction) ThresholdMet(_ context.Context, }, nil } - general.Infof("with "+ - "loadUpperBoundRatio: %.2f, loadThresholdMetPercentage: %.2f, podGracePeriodSeconds: %d", - dynamicConfig.LoadUpperBoundRatio, dynamicConfig.LoadThresholdMetPercentage, - dynamicConfig.CPUPressureEvictionConfiguration.GracePeriod) + general.Infof("with loadUpperBoundRatio: %.2f, loadThresholdMetPercentage: %.2f, podGracePeriodSeconds: %d", + dynamicConfig.LoadUpperBoundRatio, dynamicConfig.LoadThresholdMetPercentage, dynamicConfig.CPUPressureEvictionConfiguration.GracePeriod) var isSoftOver bool var softOverRatio float64 var softThresholdMetPoolName string for poolName, entries := range p.metricsHistory[consts.MetricLoad1MinContainer] { - if !entries.IsPoolEntry() || skipPools.Has(poolName) { + if !entries.IsPoolEntry() || skipPools.Has(poolName) || state.IsIsolationPool(poolName) { continue } - metricRing := entries[""] + metricRing := entries[advisorapi.FakedContainerName] if metricRing == nil { general.Warningf("pool: %s hasn't metric: %s metricsRing", poolName, consts.MetricLoad1MinContainer) continue @@ -189,7 +203,7 @@ func (p *CPUPressureLoadEviction) ThresholdMet(_ context.Context, } p.clearEvictionPoolName() - if softThresholdMetPoolName != "" { + if softThresholdMetPoolName != advisorapi.EmptyOwnerPoolName { _ = p.emitter.StoreFloat64(metricsNameThresholdMet, 1, metrics.MetricTypeNameCount, metrics.ConvertMapToTags(map[string]string{ metricsTagKeyPoolName: softThresholdMetPoolName, @@ -239,7 +253,7 @@ func (p *CPUPressureLoadEviction) GetTopEvictionPods(_ context.Context, return &pluginapi.GetTopEvictionPodsResponse{}, nil } - entries := p.state.GetPodEntries() + podPoolMap := getPodPoolMapFunc(p.metaServer.MetaAgent, p.state) candidatePods := native.FilterPods(request.ActivePods, func(pod *v1.Pod) (bool, error) { if pod == nil { return false, fmt.Errorf("FilterPods got nil pod") @@ -248,9 +262,9 @@ func (p *CPUPressureLoadEviction) GetTopEvictionPods(_ context.Context, podUID := string(pod.GetUID()) for i := range pod.Spec.Containers { containerName := pod.Spec.Containers[i].Name - if entries[podUID][containerName] == nil { + if podPoolMap[podUID][containerName] == nil { return false, nil - } else if entries[podUID][containerName].OwnerPoolName == evictionPoolName { + } else if podPoolMap[podUID][containerName].OwnerPool == evictionPoolName { return true, nil } } @@ -303,37 +317,33 @@ func (p *CPUPressureLoadEviction) collectMetrics(_ context.Context) { defer p.Unlock() dynamicConfig := p.dynamicConf.GetDynamicConfiguration() + // always reset in-memory metric histories if load-eviction is disabled if !dynamicConfig.EnableLoadEviction { p.metricsHistory = make(map[string]Entries) return } - entries := p.state.GetPodEntries() - p.clearExpiredMetricsHistory(entries) + pod2Pool := getPodPoolMapFunc(p.metaServer.MetaAgent, p.state) + p.clearExpiredMetricsHistory(pod2Pool) // collect metric for pod/container pairs, and store in local (i.e. poolsMetric) collectTime := time.Now().UnixNano() poolsMetric := make(map[string]map[string]float64) - for podUID, entry := range entries { - if entry.IsPoolEntry() { - continue - } - + for podUID, entry := range pod2Pool { for containerName, containerEntry := range entry { - if containerEntry == nil { + if containerEntry == nil || containerEntry.IsPool { continue - } else if containerEntry.OwnerPoolName == "" || skipPools.Has(containerEntry.OwnerPoolName) { - general.Infof("skip collecting metric for pod: %s, "+ - "container: %s with owner pool name: %s", podUID, containerName, containerEntry.OwnerPoolName) + } else if containerEntry.OwnerPool == advisorapi.EmptyOwnerPoolName || skipPools.Has(containerEntry.OwnerPool) { + general.Infof("skip collecting metric for pod: %s, container: %s with owner pool name: %s", + podUID, containerName, containerEntry.OwnerPool) continue } - poolName := containerEntry.OwnerPoolName + poolName := containerEntry.OwnerPool for _, metricName := range handleMetrics.UnsortedList() { m, err := p.metaServer.GetContainerMetric(podUID, containerName, metricName) if err != nil { - general.Errorf("GetContainerMetric for pod: %s, "+ - "container: %s failed with error: %v", podUID, containerName, err) + general.Errorf("GetContainerMetric for pod: %s, container: %s failed with error: %v", podUID, containerName, err) continue } @@ -355,37 +365,80 @@ func (p *CPUPressureLoadEviction) collectMetrics(_ context.Context) { } // push pre-collected local store (i.e. poolsMetric) to metric ring buffer - for poolName, entry := range entries { - if entry == nil || !entry.IsPoolEntry() || skipPools.Has(poolName) { + underPressure := p.checkSharedPressureByPoolSize(pod2Pool) + for poolName, entry := range pod2Pool { + if entry == nil { continue } - poolEntry := entry[advisorapi.FakedContainerName] - if poolEntry == nil { + for _, poolEntry := range entry { + if poolEntry == nil || !poolEntry.IsPool || skipPools.Has(poolName) { + continue + } + + for _, metricName := range handleMetrics.UnsortedList() { + if _, found := poolsMetric[poolName][metricName]; !found { + continue + } + + handler := p.poolMetricCollectHandlers[metricName] + if handler == nil { + general.Warningf("metric: %s hasn't pool metric collecting handler, use default handler", metricName) + handler = p.collectPoolMetricDefault + } + handler(dynamicConfig, underPressure, metricName, poolsMetric[poolName][metricName], poolName, poolEntry.PoolSize, collectTime) + } + } + } +} + +// checkSharedPressureByPoolSize checks if the sum of all the shared pool size has reached the maximum +func (p *CPUPressureLoadEviction) checkSharedPressureByPoolSize(pod2Pool PodPoolMap) bool { + poolSizeSum := 0 + for poolName, entry := range pod2Pool { + if entry == nil { continue } - for _, metricName := range handleMetrics.UnsortedList() { - if _, found := poolsMetric[poolName][metricName]; !found { + for _, containerEntry := range entry { + if !containerEntry.IsPool || skipPools.Has(poolName) || entry[advisorapi.FakedContainerName] == nil { continue } - - handler := p.poolMetricCollectHandlers[metricName] - if handler == nil { - general.Warningf("metric: %s hasn't pool metric "+ - "collecting handler, use default handler", metricName) - handler = p.collectPoolMetricDefault - } - handler(dynamicConfig, metricName, poolsMetric[poolName][metricName], poolEntry, collectTime) + poolSizeSum += containerEntry.PoolSize } } + + sharedPoolsLimit := p.accumulateSharedPoolsLimit() + pressureByPoolSize := poolSizeSum >= sharedPoolsLimit + general.Infof("shared pools under pressure: %v, poolSizeSum: %v, limit: %v", pressureByPoolSize, poolSizeSum, sharedPoolsLimit) + + return pressureByPoolSize +} + +// accumulateSharedPoolsLimit calculates the cpu core limit used by shared core pool, +// and it equals: machine-core - cores-for-dedicated-pods - reserved-cores-reclaim-pods - reserved-cores-system-pods. +func (p *CPUPressureLoadEviction) accumulateSharedPoolsLimit() int { + availableCPUSet := p.state.GetMachineState().GetFilteredAvailableCPUSet(p.systemReservedCPUs, nil, state.CheckDedicatedNUMABinding) + + coreNumReservedForReclaim := p.dynamicConf.GetDynamicConfiguration().MinReclaimedResourceForAllocate[v1.ResourceCPU] + reservedForReclaim := machine.GetCoreNumReservedForReclaim(int(coreNumReservedForReclaim.Value()), p.metaServer.NumNUMANodes) + + reservedForReclaimInSharedNuma := 0 + sharedCoresNUMAs := p.state.GetMachineState().GetFilteredNUMASet(state.CheckNUMABinding) + for _, numaID := range sharedCoresNUMAs.ToSliceInt() { + reservedForReclaimInSharedNuma += reservedForReclaim[numaID] + } + + result := availableCPUSet.Size() - reservedForReclaimInSharedNuma + general.Infof("get shared pools limit: %v, availableCPUSet: %v, sharedCoresNUMAs:%v, reservedForReclaim: %v", + result, availableCPUSet.String(), sharedCoresNUMAs.String(), reservedForReclaim) + return result } // collectPoolLoad is specifically used for cpu-load in pool level, // and its upper-bound and lower-bound are calculated by pool size. -func (p *CPUPressureLoadEviction) collectPoolLoad(dynamicConfig *dynamic.Configuration, - metricName string, metricValue float64, poolEntry *state.AllocationInfo, collectTime int64) { - poolSize := poolEntry.AllocationResult.Size() +func (p *CPUPressureLoadEviction) collectPoolLoad(dynamicConfig *dynamic.Configuration, pressureByPoolSize bool, + metricName string, metricValue float64, poolName string, poolSize int, collectTime int64) { snapshot := &MetricSnapshot{ Info: MetricInfo{ Name: metricName, @@ -396,14 +449,37 @@ func (p *CPUPressureLoadEviction) collectPoolLoad(dynamicConfig *dynamic.Configu Time: collectTime, } - p.logPoolSnapShot(snapshot, poolEntry.OwnerPoolName, true) - p.pushMetric(dynamicConfig, metricName, poolEntry.OwnerPoolName, "", snapshot) + useAdvisedThreshold := p.checkPressureWithAdvisedThreshold() + if useAdvisedThreshold { + // it must not be triggered when it's healthy + lowerBound := metricValue + 1 + upperBound := lowerBound * dynamicConfig.LoadUpperBoundRatio + + if pressureByPoolSize { + // soft over must be triggered when it's under pressure + lowerBound = 1 + upperBound = float64(poolSize) * dynamicConfig.LoadUpperBoundRatio + } + snapshot.Info.LowerBound = lowerBound + snapshot.Info.UpperBound = upperBound + } + + _ = p.emitter.StoreInt64(metricNameCollectPoolLoadCalled, 1, metrics.MetricTypeNameCount, + metrics.ConvertMapToTags(map[string]string{ + metricsTagKeyPoolName: poolName, + metricsTagKeyMetricName: snapshot.Info.Name, + metricsTagKeyPressureByPoolSize: strconv.FormatBool(pressureByPoolSize), + metricsTagKeyAdvisedThreshold: strconv.FormatBool(useAdvisedThreshold), + })...) + + p.logPoolSnapShot(snapshot, poolName, true) + p.pushMetric(dynamicConfig, metricName, poolName, advisorapi.FakedContainerName, snapshot) } // collectPoolMetricDefault is a common collect in pool level, // and its upper-bound and lower-bound are not defined. -func (p *CPUPressureLoadEviction) collectPoolMetricDefault(dynamicConfig *dynamic.Configuration, - metricName string, metricValue float64, poolEntry *state.AllocationInfo, collectTime int64) { +func (p *CPUPressureLoadEviction) collectPoolMetricDefault(dynamicConfig *dynamic.Configuration, _ bool, + metricName string, metricValue float64, poolName string, _ int, collectTime int64) { snapshot := &MetricSnapshot{ Info: MetricInfo{ Name: metricName, @@ -412,10 +488,11 @@ func (p *CPUPressureLoadEviction) collectPoolMetricDefault(dynamicConfig *dynami Time: collectTime, } - p.logPoolSnapShot(snapshot, poolEntry.OwnerPoolName, false) - p.pushMetric(dynamicConfig, metricName, poolEntry.OwnerPoolName, "", snapshot) + p.logPoolSnapShot(snapshot, poolName, false) + p.pushMetric(dynamicConfig, metricName, poolName, advisorapi.FakedContainerName, snapshot) } +// pushMetric stores and push-in metric for the given pod func (p *CPUPressureLoadEviction) pushMetric(dynamicConfig *dynamic.Configuration, metricName, entryName, subEntryName string, snapshot *MetricSnapshot) { if p.metricsHistory[metricName] == nil { @@ -468,6 +545,7 @@ func (p *CPUPressureLoadEviction) logPoolSnapShot(snapshot *MetricSnapshot, pool } } +// clearEvictionPoolName resets pool in local memory func (p *CPUPressureLoadEviction) clearEvictionPoolName() { if p.evictionPoolName != advisorapi.FakedContainerName { general.Infof("clear eviction pool name: %s", p.evictionPoolName) @@ -475,14 +553,16 @@ func (p *CPUPressureLoadEviction) clearEvictionPoolName() { p.evictionPoolName = advisorapi.FakedContainerName } +// setEvictionPoolName sets pool in local memory func (p *CPUPressureLoadEviction) setEvictionPoolName(evictionPoolName string) { general.Infof("set eviction pool name: %s", evictionPoolName) p.evictionPoolName = evictionPoolName } +// getEvictionPoolName returns the previously-set pool func (p *CPUPressureLoadEviction) getEvictionPoolName() (exists bool, evictionPoolName string) { evictionPoolName = p.evictionPoolName - if evictionPoolName == "" { + if evictionPoolName == advisorapi.FakedContainerName { exists = false return } @@ -490,11 +570,12 @@ func (p *CPUPressureLoadEviction) getEvictionPoolName() (exists bool, evictionPo return } -func (p *CPUPressureLoadEviction) clearExpiredMetricsHistory(entries state.PodEntries) { +// clearExpiredMetricsHistory deletes the expired metric in local memory +func (p *CPUPressureLoadEviction) clearExpiredMetricsHistory(podPoolMap PodPoolMap) { for _, metricEntries := range p.metricsHistory { for entryName, subMetricEntries := range metricEntries { for subEntryName := range subMetricEntries { - if entries[entryName][subEntryName] == nil { + if podPoolMap[entryName][subEntryName] == nil { general.Infof("entryName: %s subEntryName: %s metric entry is expired, clear it", entryName, subEntryName) delete(subMetricEntries, subEntryName) @@ -509,6 +590,7 @@ func (p *CPUPressureLoadEviction) clearExpiredMetricsHistory(entries state.PodEn } } +// getMetricHistorySumForPod returns the accumulated value for the given pod func (p *CPUPressureLoadEviction) getMetricHistorySumForPod(metricName string, pod *v1.Pod) float64 { if pod == nil { return 0 @@ -524,3 +606,12 @@ func (p *CPUPressureLoadEviction) getMetricHistorySumForPod(metricName string, p } return ret } + +// checkPressureWithAdvisedThreshold returns if we should check pressure according to advisor. +// When enabled, plugin must make sure soft over is triggered when shared_core pools can't be expanded anymore. +func (p *CPUPressureLoadEviction) checkPressureWithAdvisedThreshold() bool { + // for now, we consider ReservedResourceForAllocate as downgrading or manual intervention configuration, + // when it's set to a value greater than zero, fall back to static threshold + reservedCoreNumForAllocate := p.dynamicConf.GetDynamicConfiguration().ReservedResourceForAllocate.Cpu() + return reservedCoreNumForAllocate.Value() == 0 +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go index a2afb098b..684f157c5 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_metric.go @@ -20,7 +20,6 @@ import ( "sync" advisorapi "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor" - "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" ) @@ -54,8 +53,8 @@ func (se SubEntries) IsPoolEntry() bool { // Entries are keyed by pod UID or pool name type Entries map[string]SubEntries -type PoolMetricCollectHandler func(dynamicConfig *dynamic.Configuration, - metricName string, metricValue float64, _ *state.AllocationInfo, collectTime int64) +type PoolMetricCollectHandler func(dynamicConfig *dynamic.Configuration, poolsUnderPressure bool, + metricName string, metricValue float64, poolName string, poolSize int, collectTime int64) func (ring *MetricRing) Sum() float64 { ring.RLock() diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go index 8751fa327..dc2c287a2 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_load_test.go @@ -19,6 +19,7 @@ package strategy import ( "context" "fmt" + "math" "os" "testing" "time" @@ -26,6 +27,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" @@ -51,6 +53,9 @@ const ( defaultCPUPressureEvictionPodGracePeriodSeconds = -1 defaultLoadUpperBoundRatio = 1.8 defaultLoadThresholdMetPercentage = 0.8 + defaultReservedForAllocate = "4" + defaultReservedForReclaim = "4" + defaultReservedForSystem = 0 ) func makeMetaServer(metricsFetcher metric.MetricsFetcher, cpuTopology *machine.CPUTopology) *metaserver.MetaServer { @@ -67,13 +72,20 @@ func makeMetaServer(metricsFetcher metric.MetricsFetcher, cpuTopology *machine.C } func makeConf(metricRingSize int, gracePeriod int64, loadUpperBoundRatio, - loadThresholdMetPercentage float64) *config.Configuration { + loadThresholdMetPercentage float64, reservedForReclaim, reservedForAllocate string, reservedForSystem int) *config.Configuration { conf := config.NewConfiguration() conf.GetDynamicConfiguration().EnableLoadEviction = true conf.GetDynamicConfiguration().LoadMetricRingSize = metricRingSize conf.GetDynamicConfiguration().LoadUpperBoundRatio = loadUpperBoundRatio conf.GetDynamicConfiguration().LoadThresholdMetPercentage = loadThresholdMetPercentage conf.GetDynamicConfiguration().CPUPressureEvictionConfiguration.GracePeriod = gracePeriod + conf.GetDynamicConfiguration().ReservedResourceForAllocate = v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(reservedForAllocate), + } + conf.GetDynamicConfiguration().MinReclaimedResourceForAllocate = v1.ResourceList{ + v1.ResourceCPU: resource.MustParse(reservedForReclaim), + } + conf.ReservedCPUCores = reservedForSystem return conf } @@ -93,12 +105,13 @@ func TestNewCPUPressureLoadEviction(t *testing.T) { cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) as.Nil(err) conf := makeConf(defaultMetricRingSize, int64(defaultCPUPressureEvictionPodGracePeriodSeconds), - defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage) + defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage, defaultReservedForReclaim, defaultReservedForAllocate, defaultReservedForSystem) metaServer := makeMetaServer(metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}), cpuTopology) stateImpl, err := makeState(cpuTopology) as.Nil(err) - plugin := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + plugin, createPluginErr := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + as.Nil(createPluginErr) as.Nil(err) as.NotNil(plugin) } @@ -111,12 +124,13 @@ func TestThresholdMet(t *testing.T) { cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) as.Nil(err) conf := makeConf(defaultMetricRingSize, int64(defaultCPUPressureEvictionPodGracePeriodSeconds), - defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage) + defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage, defaultReservedForReclaim, defaultReservedForAllocate, defaultReservedForSystem) metaServer := makeMetaServer(metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}), cpuTopology) stateImpl, err := makeState(cpuTopology) as.Nil(err) - plugin := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + plugin, createPluginErr := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + as.Nil(createPluginErr) as.NotNil(plugin) pod1UID := string(uuid.NewUUID()) @@ -396,12 +410,13 @@ func TestGetTopEvictionPods(t *testing.T) { cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) as.Nil(err) conf := makeConf(defaultMetricRingSize, int64(defaultCPUPressureEvictionPodGracePeriodSeconds), - defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage) + defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage, defaultReservedForReclaim, defaultReservedForAllocate, defaultReservedForSystem) metaServer := makeMetaServer(metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}), cpuTopology) stateImpl, err := makeState(cpuTopology) as.Nil(err) - plugin := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + plugin, createPluginErr := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + as.Nil(createPluginErr) as.Nil(err) as.NotNil(plugin) @@ -785,3 +800,805 @@ func TestGetTopEvictionPods(t *testing.T) { }) } } + +func TestCPUPressureLoadEviction_collectMetrics(t *testing.T) { + t.Parallel() + + pod1UID := "pod1" + pod2UID := "pod2" + pod3UID := "pod3" + pod4UID := "pod4" + testName := "test" + + tests := []struct { + name string + reservedCPUForAllocate string + reservedCPUForReclaim string + reservedCPUForSystem int + podEntries qrmstate.PodEntries + loads map[string]map[string]float64 + wantSharedPoolSnapshots MetricInfo + }{ + { + name: "use default bound, without dedicated core pod", + reservedCPUForAllocate: "4", + reservedCPUForReclaim: "4", + podEntries: qrmstate.PodEntries{ + pod1UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod1UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("1,3-6,9,11-14"), + OriginalAllocationResult: machine.MustParse("1,3-6,9,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(1, 9), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(1, 9), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod2UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod2UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("1,3-6,9,11-14"), + OriginalAllocationResult: machine.MustParse("1,3-6,9,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(1, 9), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(1, 9), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod3UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod3UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.NewCPUSet(7, 8, 10, 15), + OriginalAllocationResult: machine.NewCPUSet(7, 8, 10, 15), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(8), + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(8), + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelReclaimedCores, + RequestQuantity: 2, + }, + }, + qrmstate.PoolNameShare: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameShare, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("1,3-6,9,11-14"), + OriginalAllocationResult: machine.MustParse("1,3-6,9,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(1, 9), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(1, 9), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + }, + }, + qrmstate.PoolNameReclaim: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameReclaim, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.MustParse("7-8,10,15"), + OriginalAllocationResult: machine.MustParse("7-8,10,15"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(8), + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(8), + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + }, + }, + }, + loads: map[string]map[string]float64{ + pod1UID: { + testName: 1, + }, + pod2UID: { + testName: 1.4, + }, + pod3UID: { + testName: 5, + }, + pod4UID: { + testName: 8, + }, + }, + wantSharedPoolSnapshots: MetricInfo{ + Name: consts.MetricLoad1MinContainer, + Value: 2.4, + UpperBound: 18, + LowerBound: 10, + }, + }, + { + name: "use default bound, with dedicated core pod", + reservedCPUForAllocate: "4", + reservedCPUForReclaim: "4", + podEntries: qrmstate.PodEntries{ + pod1UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod1UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod2UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod2UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod3UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod3UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.NewCPUSet(7, 10, 15), + OriginalAllocationResult: machine.NewCPUSet(7, 10, 15), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelReclaimedCores, + RequestQuantity: 2, + }, + }, + pod4UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod4UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameDedicated, + AllocationResult: machine.NewCPUSet(0-1, 8-9), + OriginalAllocationResult: machine.NewCPUSet(0-1, 8-9), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 8, 9), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 8, 9), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelDedicatedCores, + RequestQuantity: 2, + }, + }, + + qrmstate.PoolNameShare: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameShare, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + }, + }, + qrmstate.PoolNameReclaim: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameReclaim, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.MustParse("7,10,15"), + OriginalAllocationResult: machine.MustParse("7,10,15"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + }, + }, + }, + loads: map[string]map[string]float64{ + pod1UID: { + testName: 1, + }, + pod2UID: { + testName: 2.4, + }, + pod3UID: { + testName: 5, + }, + pod4UID: { + testName: 8, + }, + }, + wantSharedPoolSnapshots: MetricInfo{ + Name: consts.MetricLoad1MinContainer, + Value: 3.4, + UpperBound: 8 * 1.8, + LowerBound: 8, + }, + }, + { + name: "use dynamic bound, has pressure, with dedicated core pod", + reservedCPUForAllocate: "0", + reservedCPUForReclaim: "4", + reservedCPUForSystem: 4, + podEntries: qrmstate.PodEntries{ + pod1UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod1UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod2UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod2UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod3UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod3UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.NewCPUSet(7, 10, 15), + OriginalAllocationResult: machine.NewCPUSet(7, 10, 15), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelReclaimedCores, + RequestQuantity: 2, + }, + }, + pod4UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod4UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameDedicated, + AllocationResult: machine.NewCPUSet(0-1, 8-9), + OriginalAllocationResult: machine.NewCPUSet(0-1, 8-9), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 8, 9), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 8, 9), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelDedicatedCores, + RequestQuantity: 2, + }, + }, + + qrmstate.PoolNameShare: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameShare, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + }, + }, + qrmstate.PoolNameReclaim: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameReclaim, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.MustParse("7,10,15"), + OriginalAllocationResult: machine.MustParse("7,10,15"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + }, + }, + }, + loads: map[string]map[string]float64{ + pod1UID: { + testName: 1, + }, + pod2UID: { + testName: 2.4, + }, + pod3UID: { + testName: 5, + }, + pod4UID: { + testName: 8, + }, + }, + wantSharedPoolSnapshots: MetricInfo{ + Name: consts.MetricLoad1MinContainer, + Value: 3.4, + LowerBound: 1, + UpperBound: 8 * 1.8, + }, + }, + { + name: "use dynamic bound, no pressure, with dedicated core pod", + reservedCPUForAllocate: "0", + reservedCPUForReclaim: "4", + reservedCPUForSystem: 0, + podEntries: qrmstate.PodEntries{ + pod1UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod1UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod2UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod2UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelSharedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelSharedCores, + RequestQuantity: 2, + }, + }, + pod3UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod3UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.NewCPUSet(7, 10, 15), + OriginalAllocationResult: machine.NewCPUSet(7, 10, 15), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelReclaimedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelReclaimedCores, + RequestQuantity: 2, + }, + }, + pod4UID: qrmstate.ContainerEntries{ + testName: &qrmstate.AllocationInfo{ + PodUid: pod4UID, + PodNamespace: testName, + PodName: testName, + ContainerName: testName, + ContainerType: pluginapi.ContainerType_MAIN.String(), + ContainerIndex: 0, + RampUp: false, + OwnerPoolName: qrmstate.PoolNameDedicated, + AllocationResult: machine.NewCPUSet(0-1, 8-9), + OriginalAllocationResult: machine.NewCPUSet(0-1, 8-9), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 8, 9), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(0, 1, 8, 9), + }, + Labels: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + Annotations: map[string]string{ + apiconsts.PodAnnotationQoSLevelKey: apiconsts.PodAnnotationQoSLevelDedicatedCores, + }, + QoSLevel: apiconsts.PodAnnotationQoSLevelDedicatedCores, + RequestQuantity: 2, + }, + }, + + qrmstate.PoolNameShare: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameShare, + OwnerPoolName: qrmstate.PoolNameShare, + AllocationResult: machine.MustParse("3-6,11-14"), + OriginalAllocationResult: machine.MustParse("3-6,11-14"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 0: machine.NewCPUSet(), + 1: machine.NewCPUSet(3, 11), + 2: machine.NewCPUSet(4, 5, 11, 12), + 3: machine.NewCPUSet(6, 14), + }, + }, + }, + qrmstate.PoolNameReclaim: qrmstate.ContainerEntries{ + "": &qrmstate.AllocationInfo{ + PodUid: qrmstate.PoolNameReclaim, + OwnerPoolName: qrmstate.PoolNameReclaim, + AllocationResult: machine.MustParse("7,10,15"), + OriginalAllocationResult: machine.MustParse("7,10,15"), + TopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + OriginalTopologyAwareAssignments: map[int]machine.CPUSet{ + 1: machine.NewCPUSet(10), + 3: machine.NewCPUSet(7, 15), + }, + }, + }, + }, + loads: map[string]map[string]float64{ + pod1UID: { + testName: 1, + }, + pod2UID: { + testName: 2.4, + }, + pod3UID: { + testName: 5, + }, + pod4UID: { + testName: 8, + }, + }, + wantSharedPoolSnapshots: MetricInfo{ + Name: consts.MetricLoad1MinContainer, + Value: 3.4, + LowerBound: 4.4, + UpperBound: 4.4 * 1.8, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + as := require.New(t) + + cpuTopology, err := machine.GenerateDummyCPUTopology(16, 2, 4) + as.Nil(err) + conf := makeConf(defaultMetricRingSize, int64(defaultCPUPressureEvictionPodGracePeriodSeconds), + defaultLoadUpperBoundRatio, defaultLoadThresholdMetPercentage, tt.reservedCPUForReclaim, tt.reservedCPUForAllocate, tt.reservedCPUForSystem) + stateImpl, err := makeState(cpuTopology) + as.Nil(err) + + fakeMetricsFetcher := metric.NewFakeMetricsFetcher(metrics.DummyMetrics{}).(*metric.FakeMetricsFetcher) + metaServer := makeMetaServer(fakeMetricsFetcher, cpuTopology) + assert.NotNil(t, fakeMetricsFetcher) + + now := time.Now() + for entryName, entries := range tt.podEntries { + for subEntryName, entry := range entries { + stateImpl.SetAllocationInfo(entryName, subEntryName, entry) + + if entries.IsPoolEntry() { + continue + } + + curLoad, found := tt.loads[entryName][subEntryName] + as.True(found) + fakeMetricsFetcher.SetContainerMetric(entryName, subEntryName, consts.MetricLoad1MinContainer, utilmetric.MetricData{Value: curLoad, Time: &now}) + } + } + + plugin, createPluginErr := NewCPUPressureLoadEviction(metrics.DummyMetrics{}, metaServer, conf, stateImpl) + as.Nil(createPluginErr) + as.Nil(err) + as.NotNil(plugin) + p := plugin.(*CPUPressureLoadEviction) + p.collectMetrics(context.TODO()) + metricRing := p.metricsHistory[consts.MetricLoad1MinContainer][qrmstate.PoolNameShare][""] + + snapshot := metricRing.Queue[metricRing.CurrentIndex] + as.True(math.Abs(tt.wantSharedPoolSnapshots.Value-snapshot.Info.Value) < 0.01) + as.True(math.Abs(tt.wantSharedPoolSnapshots.UpperBound-snapshot.Info.UpperBound) < 0.01) + as.True(math.Abs(tt.wantSharedPoolSnapshots.LowerBound-snapshot.Info.LowerBound) < 0.01) + }) + } +} diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go index 0ea245bd8..181131c8c 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpueviction/strategy/pressure_suppression.go @@ -39,13 +39,13 @@ import ( type CPUPressureSuppression struct { conf *config.Configuration - state state.State + state state.ReadonlyState lastToleranceTime sync.Map } func NewCPUPressureSuppressionEviction(_ metrics.MetricEmitter, _ *metaserver.MetaServer, - conf *config.Configuration, state state.State) CPUPressureForceEviction { + conf *config.Configuration, state state.ReadonlyState) CPUPressureForceEviction { return &CPUPressureSuppression{ conf: conf, state: state, diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go index 8d6c60252..5192ca76f 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go @@ -127,15 +127,12 @@ type DynamicPolicy struct { func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration, _ interface{}, agentName string) (bool, agent.Component, error) { - allCPUs := agentCtx.CPUDetails.CPUs().Clone() - reservedCPUsNum := conf.ReservedCPUCores - reservedCPUs, _, reserveErr := calculator.TakeHTByNUMABalance(agentCtx.KatalystMachineInfo, allCPUs, reservedCPUsNum) + reservedCPUs, reserveErr := util.GetCoresReservedForSystem(conf, agentCtx.KatalystMachineInfo, agentCtx.CPUDetails.CPUs().Clone()) if reserveErr != nil { - return false, agent.ComponentStub{}, fmt.Errorf("takeByNUMABalance for reservedCPUsNum: %d failed with error: %v", + return false, agent.ComponentStub{}, fmt.Errorf("GetCoresReservedForSystem for reservedCPUsNum: %d failed with error: %v", conf.ReservedCPUCores, reserveErr) } - general.Infof("take reservedCPUs: %s by reservedCPUsNum: %d", reservedCPUs.String(), reservedCPUsNum) stateImpl, stateErr := state.NewCheckpointState(conf.GenericQRMPluginConfiguration.StateFileDirectory, cpuPluginStateFileName, cpuutil.CPUResourcePluginPolicyNameDynamic, agentCtx.CPUTopology, conf.SkipCPUStateCorruption) diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go index 7f61c739e..7470f9dac 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go @@ -391,6 +391,19 @@ func (ns *NUMANodeState) GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA return res } +// ExistMatchedAllocationInfo returns true if the stated predicate holds true for some pods of this numa else it returns false. +func (ns *NUMANodeState) ExistMatchedAllocationInfo(f func(ai *AllocationInfo) bool) bool { + for _, containerEntries := range ns.PodEntries { + for _, allocationInfo := range containerEntries { + if f(allocationInfo) { + return true + } + } + } + + return false +} + func (ns *NUMANodeState) SetAllocationInfo(podUID string, containerName string, allocationInfo *AllocationInfo) { if ns == nil { return @@ -435,6 +448,18 @@ func (nm NUMANodeMap) GetFilteredAvailableCPUSet(reservedCPUs machine.CPUSet, return nm.GetFilteredDefaultCPUSet(excludeEntry, excludeWholeNUMA).Difference(reservedCPUs) } +// GetFilteredNUMASet return numa set except the numa which are excluded by the predicate. +func (nm NUMANodeMap) GetFilteredNUMASet(excludeNUMAPredicate func(ai *AllocationInfo) bool) machine.CPUSet { + res := machine.NewCPUSet() + for numaID, numaNodeState := range nm { + if numaNodeState.ExistMatchedAllocationInfo(excludeNUMAPredicate) { + continue + } + res.Add(numaID) + } + return res +} + func (nm NUMANodeMap) Clone() NUMANodeMap { if nm == nil { return nil diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go index 8b538ef07..50e35423c 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/util.go @@ -18,6 +18,7 @@ package state import ( "fmt" + "strings" "sync" "k8s.io/apimachinery/pkg/util/sets" @@ -206,3 +207,7 @@ func GenerateMachineStateFromPodEntries(topology *machine.CPUTopology, podEntrie } return machineState, nil } + +func IsIsolationPool(poolName string) bool { + return strings.HasPrefix(poolName, PoolNameIsolation) +} diff --git a/pkg/agent/qrm-plugins/util/util.go b/pkg/agent/qrm-plugins/util/util.go index ac2ae2a9e..a41e61f4f 100644 --- a/pkg/agent/qrm-plugins/util/util.go +++ b/pkg/agent/qrm-plugins/util/util.go @@ -29,7 +29,9 @@ import ( pluginapi "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" "github.com/kubewharf/katalyst-api/pkg/consts" + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/calculator" "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state" + "github.com/kubewharf/katalyst-core/pkg/config" "github.com/kubewharf/katalyst-core/pkg/config/generic" "github.com/kubewharf/katalyst-core/pkg/util/general" "github.com/kubewharf/katalyst-core/pkg/util/machine" @@ -313,6 +315,18 @@ func GetContainerAsyncWorkName(podUID, containerName, topic string) string { return strings.Join([]string{podUID, containerName, topic}, "/") } +func GetCoresReservedForSystem(conf *config.Configuration, machineInfo *machine.KatalystMachineInfo, allCPUs machine.CPUSet) (machine.CPUSet, error) { + systemReservedNum := conf.ReservedCPUCores + reservedCPUs, _, reserveErr := calculator.TakeHTByNUMABalance(machineInfo, allCPUs, systemReservedNum) + if reserveErr != nil { + return reservedCPUs, fmt.Errorf("takeByNUMABalance for reservedCPUsNum: %d failed with error: %v", + systemReservedNum, reserveErr) + } + + general.Infof("take reservedCPUs: %s by reservedCPUsNum: %d", reservedCPUs.String(), systemReservedNum) + return reservedCPUs, nil +} + // RegenerateHints regenerates hints for container that'd already been allocated cpu, // and regenerateHints will assemble hints based on already-existed AllocationInfo, // without any calculation logics at all diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go index 30f270dc2..40a14094f 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor.go @@ -23,6 +23,7 @@ import ( "sync" "time" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" @@ -117,7 +118,8 @@ func NewCPUResourceAdvisor(conf *config.Configuration, extraConf interface{}, me emitter: emitter, } - cra.initializeReservedForReclaim() + coreNumReservedForReclaim := conf.DynamicAgentConfiguration.GetDynamicConfiguration().MinReclaimedResourceForAllocate[v1.ResourceCPU] + cra.reservedForReclaim = machine.GetCoreNumReservedForReclaim(int(coreNumReservedForReclaim.Value()), metaServer.KatalystMachineInfo.NumNUMANodes) if err := cra.initializeProvisionAssembler(); err != nil { klog.Errorf("[qosaware-cpu] initialize provision assembler failed: %v", err) diff --git a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go index d9765bc85..a419adc78 100644 --- a/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go +++ b/pkg/agent/sysadvisor/plugin/qosaware/resource/cpu/advisor_helper.go @@ -134,32 +134,6 @@ func (cra *cpuResourceAdvisor) initializeHeadroomAssembler() error { return nil } -// initializeReservedForReclaim generates per numa reserved for reclaim resource value map. -// per numa reserved resource is taken in a fair way with even step, e.g. -// 4 -> 1 1 1 1; 2 -> 1 0 1 0 -func (cra *cpuResourceAdvisor) initializeReservedForReclaim() { - reservedTotal := types.ReservedForReclaim - numNumaNodes := cra.metaServer.NumNUMANodes - reservedPerNuma := reservedTotal / numNumaNodes - step := numNumaNodes / reservedTotal - - if reservedPerNuma < 1 { - reservedPerNuma = 1 - } - if step < 1 { - step = 1 - } - - cra.reservedForReclaim = make(map[int]int) - for id := 0; id < numNumaNodes; id++ { - if id%step == 0 { - cra.reservedForReclaim[id] = reservedPerNuma - } else { - cra.reservedForReclaim[id] = 0 - } - } -} - // updateNumasAvailableResource updates available resource of all numa nodes. // available = total - reserved pool - reserved for reclaim func (cra *cpuResourceAdvisor) updateNumasAvailableResource() { diff --git a/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go b/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go index ee828c118..e9f5f7e01 100644 --- a/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go +++ b/pkg/config/agent/dynamic/adminqos/reclaimedresource/reclaimedresource_base.go @@ -25,10 +25,11 @@ import ( ) type ReclaimedResourceConfiguration struct { - EnableReclaim bool - ReservedResourceForReport v1.ResourceList - MinReclaimedResourceForReport v1.ResourceList - ReservedResourceForAllocate v1.ResourceList + EnableReclaim bool + ReservedResourceForReport v1.ResourceList + MinReclaimedResourceForReport v1.ResourceList + ReservedResourceForAllocate v1.ResourceList + MinReclaimedResourceForAllocate v1.ResourceList *cpuheadroom.CPUHeadroomConfiguration *memoryheadroom.MemoryHeadroomConfiguration @@ -65,6 +66,12 @@ func (c *ReclaimedResourceConfiguration) ApplyConfiguration(conf *crd.DynamicCon c.ReservedResourceForAllocate[resourceName] = value } } + + if config.MinReclaimedResourceForAllocate != nil { + for resourceName, value := range *config.MinReclaimedResourceForAllocate { + c.MinReclaimedResourceForAllocate[resourceName] = value + } + } } c.CPUHeadroomConfiguration.ApplyConfiguration(conf) diff --git a/pkg/util/machine/cpu.go b/pkg/util/machine/cpu.go index 43be89697..72845850f 100644 --- a/pkg/util/machine/cpu.go +++ b/pkg/util/machine/cpu.go @@ -73,3 +73,29 @@ func getCPUInstructionInfo(cpuInfo string) sets.String { return supportInstructionSet } + +// GetCoreNumReservedForReclaim generates per numa reserved for reclaim resource value map. +// per numa reserved resource is taken in a fair way with even step, e.g. +// 4 -> 1 1 1 1; 2 -> 1 0 1 0 +func GetCoreNumReservedForReclaim(numReservedCores, numNumaNodes int) map[int]int { + reservedPerNuma := numReservedCores / numNumaNodes + step := numNumaNodes / numReservedCores + + if reservedPerNuma < 1 { + reservedPerNuma = 1 + } + if step < 1 { + step = 1 + } + + reservedForReclaim := make(map[int]int) + for id := 0; id < numNumaNodes; id++ { + if id%step == 0 { + reservedForReclaim[id] = reservedPerNuma + } else { + reservedForReclaim[id] = 0 + } + } + + return reservedForReclaim +}