From 132d12c390931a17a4eb36290b6c776072cbc439 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E9=B8=BF=E6=96=8C?= Date: Mon, 23 Oct 2023 20:23:06 +0800 Subject: [PATCH] feat(eviction): system free memory threshold minimum --- .../adminqos/eviction/memory_pressure_eviction.go | 10 ++++++++++ go.mod | 2 +- go.sum | 4 ++-- .../plugin/memory/numa_pressure_test.go | 5 +++-- .../evictionmanager/plugin/memory/system_pressure.go | 10 +++++++--- .../plugin/memory/system_pressure_test.go | 4 +++- .../adminqos/eviction/memory_pressure_eviction.go | 7 +++++++ 7 files changed, 33 insertions(+), 9 deletions(-) diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/memory_pressure_eviction.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/memory_pressure_eviction.go index 878f10683..da4e601ae 100644 --- a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/memory_pressure_eviction.go +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/memory_pressure_eviction.go @@ -17,6 +17,7 @@ limitations under the License. package eviction import ( + "k8s.io/apimachinery/pkg/api/resource" cliflag "k8s.io/component-base/cli/flag" "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/eviction" @@ -28,6 +29,7 @@ type MemoryPressureEvictionOptions struct { EnableSystemLevelEviction bool NumaVictimMinimumUtilizationThreshold float64 NumaFreeBelowWatermarkTimesThreshold int + SystemFreeMemoryThresholdMinimum string SystemKswapdRateThreshold int SystemKswapdRateExceedDurationThreshold int NumaEvictionRankingMetrics []string @@ -44,6 +46,7 @@ func NewMemoryPressureEvictionOptions() *MemoryPressureEvictionOptions { EnableSystemLevelEviction: eviction.DefaultEnableSystemLevelEviction, NumaVictimMinimumUtilizationThreshold: eviction.DefaultNumaVictimMinimumUtilizationThreshold, NumaFreeBelowWatermarkTimesThreshold: eviction.DefaultNumaFreeBelowWatermarkTimesThreshold, + SystemFreeMemoryThresholdMinimum: eviction.DefaultSystemFreeMemoryThresholdMinimum, SystemKswapdRateThreshold: eviction.DefaultSystemKswapdRateThreshold, SystemKswapdRateExceedDurationThreshold: eviction.DefaultSystemKswapdRateExceedDurationThreshold, NumaEvictionRankingMetrics: eviction.DefaultNumaEvictionRankingMetrics, @@ -66,6 +69,8 @@ func (o *MemoryPressureEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) { "the threshold for the victim's minimum memory utilization on a NUMA node") fs.IntVar(&o.NumaFreeBelowWatermarkTimesThreshold, "eviction-numa-free-below-watermark-times-threshold", o.NumaFreeBelowWatermarkTimesThreshold, "the threshold for the number of times NUMA's free memory falls below the watermark") + fs.StringVar(&o.SystemFreeMemoryThresholdMinimum, "eviction-system-free-memory-threshold-minimum", o.SystemFreeMemoryThresholdMinimum, + "the minimum of free memory threshold,it should be a string can be parsed to a quantity, e.g. 10Gi,20Ki") fs.IntVar(&o.SystemKswapdRateThreshold, "eviction-system-kswapd-rate-threshold", o.SystemKswapdRateThreshold, "the threshold for the rate of kswapd reclaiming rate") fs.IntVar(&o.SystemKswapdRateExceedDurationThreshold, "eviction-system-kswapd-rate-exceed-duration-threshold", o.SystemKswapdRateExceedDurationThreshold, @@ -88,6 +93,11 @@ func (o *MemoryPressureEvictionOptions) ApplyTo(c *eviction.MemoryPressureEvicti c.EnableSystemLevelEviction = o.EnableSystemLevelEviction c.NumaVictimMinimumUtilizationThreshold = o.NumaVictimMinimumUtilizationThreshold c.NumaFreeBelowWatermarkTimesThreshold = o.NumaFreeBelowWatermarkTimesThreshold + quantity, err := resource.ParseQuantity(o.SystemFreeMemoryThresholdMinimum) + if err != nil { + return err + } + c.SystemFreeMemoryThresholdMinimum = quantity.Value() c.SystemKswapdRateThreshold = o.SystemKswapdRateThreshold c.SystemKswapdRateExceedDurationThreshold = o.SystemKswapdRateExceedDurationThreshold c.NumaEvictionRankingMetrics = o.NumaEvictionRankingMetrics diff --git a/go.mod b/go.mod index 3c11e270f..6ce9debc3 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/golang/protobuf v1.5.2 github.com/google/cadvisor v0.44.1 - github.com/kubewharf/katalyst-api v0.1.17-0.20231026032433-74962f058620 + github.com/kubewharf/katalyst-api v0.1.17-0.20231026110318-f28161d56ba5 github.com/montanaflynn/stats v0.7.1 github.com/opencontainers/runc v1.1.1 github.com/pkg/errors v0.9.1 diff --git a/go.sum b/go.sum index 14d3d6736..9b4ca290a 100644 --- a/go.sum +++ b/go.sum @@ -543,8 +543,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.1.17-0.20231026032433-74962f058620 h1:o+DZQVvq6foR/GszZPrwWvx1rTfaOf+Cm0zEzk+FuPM= -github.com/kubewharf/katalyst-api v0.1.17-0.20231026032433-74962f058620/go.mod h1:iVILS5UL5PRtkUPH2Iu1K/gFGTPMNItnth5fmQ80VGE= +github.com/kubewharf/katalyst-api v0.1.17-0.20231026110318-f28161d56ba5 h1:zsWm5Jg4rdDFpXb8UvjOiqa5iriabV8/+UKjY9qtdo8= +github.com/kubewharf/katalyst-api v0.1.17-0.20231026110318-f28161d56ba5/go.mod h1:iVILS5UL5PRtkUPH2Iu1K/gFGTPMNItnth5fmQ80VGE= github.com/kubewharf/kubelet v1.24.6-kubewharf.7 h1:zex5NjgWh3b+fk8sey5Hp/hOVoSKdqf4mJu8MeE8T4k= github.com/kubewharf/kubelet v1.24.6-kubewharf.7/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= diff --git a/pkg/agent/evictionmanager/plugin/memory/numa_pressure_test.go b/pkg/agent/evictionmanager/plugin/memory/numa_pressure_test.go index 0c74a1786..56866530e 100644 --- a/pkg/agent/evictionmanager/plugin/memory/numa_pressure_test.go +++ b/pkg/agent/evictionmanager/plugin/memory/numa_pressure_test.go @@ -39,7 +39,8 @@ import ( ) var ( - numaTotalMap = []float64{50 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024} + numaTotalMap = []float64{50 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024, 50 * 1024 * 1024 * 1024} + numaScaleFactor = 600 ) func makeNumaPressureEvictionPlugin(conf *config.Configuration) (*NumaMemoryPressurePlugin, error) { @@ -85,7 +86,7 @@ func TestNumaMemoryPressurePlugin_ThresholdMet(t *testing.T) { assert.NotNil(t, fakeMetricsFetcher) now := time.Now() - fakeMetricsFetcher.SetNodeMetric(consts.MetricMemScaleFactorSystem, utilMetric.MetricData{Value: float64(scaleFactor), Time: &now}) + fakeMetricsFetcher.SetNodeMetric(consts.MetricMemScaleFactorSystem, utilMetric.MetricData{Value: float64(numaScaleFactor), Time: &now}) for numaID, numaTotal := range numaTotalMap { fakeMetricsFetcher.SetNumaMetric(numaID, consts.MetricMemTotalNuma, utilMetric.MetricData{Value: numaTotal, Time: &now}) } diff --git a/pkg/agent/evictionmanager/plugin/memory/system_pressure.go b/pkg/agent/evictionmanager/plugin/memory/system_pressure.go index 086f32f1a..f9dccdab6 100644 --- a/pkg/agent/evictionmanager/plugin/memory/system_pressure.go +++ b/pkg/agent/evictionmanager/plugin/memory/system_pressure.go @@ -19,6 +19,7 @@ package memory import ( "context" "fmt" + "math" "strconv" "sync" "time" @@ -172,11 +173,14 @@ func (s *SystemPressureEvictionPlugin) detectSystemWatermarkPressure() { return } + thresholdMinimum := float64(s.dynamicConfig.GetDynamicConfiguration().SystemFreeMemoryThresholdMinimum) + threshold := math.Max(thresholdMinimum, total*scaleFactor/10000) + general.Infof("system watermark metrics, "+ - "free: %+v, total: %+v, scaleFactor: %+v", - free, total, scaleFactor) + "free: %+v, total: %+v, scaleFactor: %+v, configuration minimum: %+v, final threshold: %+v", + free, total, scaleFactor, thresholdMinimum, threshold) - if free < total*scaleFactor/10000 { + if free < threshold { s.isUnderSystemPressure = true s.systemAction = actionReclaimedEviction } diff --git a/pkg/agent/evictionmanager/plugin/memory/system_pressure_test.go b/pkg/agent/evictionmanager/plugin/memory/system_pressure_test.go index 7546f70aa..2a0649331 100644 --- a/pkg/agent/evictionmanager/plugin/memory/system_pressure_test.go +++ b/pkg/agent/evictionmanager/plugin/memory/system_pressure_test.go @@ -49,12 +49,13 @@ var ( evictionManagerSyncPeriod = 10 * time.Second numaFreeBelowWatermarkTimesThreshold = 3 numaVictimMinimumUsageThreshold = 0.001 + systemFreeMemoryThresholdMinimum = int64(5 * 1024 * 1024 * 1024) systemKswapdRateThreshold = 1000 systemKswapdRateExceedDurationThreshold = 90 systemPluginSyncPeriod = 30 systemPluginCoolDownPeriod = 40 - scaleFactor = 600 + scaleFactor = 100 systemTotal = 100 * 1024 * 1024 * 1024 highPriority int32 = 100000 @@ -75,6 +76,7 @@ func makeConf() *config.Configuration { conf.GetDynamicConfiguration().NumaEvictionRankingMetrics = evictionconfig.DefaultNumaEvictionRankingMetrics conf.GetDynamicConfiguration().SystemEvictionRankingMetrics = evictionconfig.DefaultSystemEvictionRankingMetrics conf.GetDynamicConfiguration().MemoryPressureEvictionConfiguration.GracePeriod = evictionconfig.DefaultGracePeriod + conf.GetDynamicConfiguration().SystemFreeMemoryThresholdMinimum = systemFreeMemoryThresholdMinimum return conf } diff --git a/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go b/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go index 2caa3ba33..a60d43940 100644 --- a/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go +++ b/pkg/config/agent/dynamic/adminqos/eviction/memory_pressure_eviction.go @@ -39,6 +39,8 @@ const ( // DefaultNumaFreeBelowWatermarkTimesThreshold is the default threshold for the number of times // that NUMA's free memory falls below the watermark DefaultNumaFreeBelowWatermarkTimesThreshold = 4 + // DefaultSystemFreeMemoryThresholdMinimum is the minimum of free memory threshold. + DefaultSystemFreeMemoryThresholdMinimum = "10Gi" // DefaultSystemKswapdRateThreshold is the default threshold for the rate of kswapd reclaiming rate DefaultSystemKswapdRateThreshold = 2000 // DefaultSystemKswapdRateExceedTimesThreshold is the default threshold for the number of times @@ -66,6 +68,7 @@ type MemoryPressureEvictionConfiguration struct { EnableSystemLevelEviction bool NumaVictimMinimumUtilizationThreshold float64 NumaFreeBelowWatermarkTimesThreshold int + SystemFreeMemoryThresholdMinimum int64 SystemKswapdRateThreshold int SystemKswapdRateExceedDurationThreshold int NumaEvictionRankingMetrics []string @@ -100,6 +103,10 @@ func (c *MemoryPressureEvictionConfiguration) ApplyConfiguration(conf *crd.Dynam c.NumaFreeBelowWatermarkTimesThreshold = *(config.NumaFreeBelowWatermarkTimesThreshold) } + if config.SystemFreeMemoryThresholdMinimum != nil { + c.SystemFreeMemoryThresholdMinimum = config.SystemFreeMemoryThresholdMinimum.Value() + } + if config.SystemKswapdRateThreshold != nil { c.SystemKswapdRateThreshold = *(config.SystemKswapdRateThreshold) }