From 040060d10a3959b57c950bab7bbb69dad135bfe9 Mon Sep 17 00:00:00 2001 From: linzhecheng Date: Thu, 18 Jan 2024 20:54:06 +0800 Subject: [PATCH] refactor(eviction): support config eviciton tolerance duration Signed-off-by: linzhecheng --- .../eviction/reclaimed_resources_eviction.go | 13 ++++-- go.mod | 1 + go.sum | 4 +- .../plugin/resource/reclaimed_resources.go | 11 +++-- .../plugin/resource/resources.go | 41 ++++++++++--------- .../eviction/reclaimed_resources_eviction.go | 11 +++-- .../agent-healthz/helper/taint_cnr.go | 2 +- 7 files changed, 51 insertions(+), 32 deletions(-) diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go index bad84937b1..c88c5483ed 100644 --- a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go @@ -25,8 +25,9 @@ import ( ) type ReclaimedResourcesEvictionOptions struct { - EvictionThreshold native.ResourceThreshold - GracePeriod int64 + EvictionThreshold native.ResourceThreshold + GracePeriod int64 + ThresholdMetToleranceDuration int64 } func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions { @@ -35,7 +36,8 @@ func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions { consts.ReclaimedResourceMilliCPU: 5.0, consts.ReclaimedResourceMemory: 5.0, }, - GracePeriod: 60, + GracePeriod: 60, + ThresholdMetToleranceDuration: 0, } } @@ -46,10 +48,13 @@ func (o *ReclaimedResourcesEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) "the threshold rate for best effort resources") fs.Int64Var(&o.GracePeriod, "eviction-reclaimed-resources-grace-period", o.GracePeriod, "the graceful eviction period (in seconds) for reclaimed pods") + fs.Int64Var(&o.ThresholdMetToleranceDuration, "eviction-reclaimed-resources-threshold-met-tolerance-Duration", + o.ThresholdMetToleranceDuration, "the tolerance duration before eviction.") } func (o *ReclaimedResourcesEvictionOptions) ApplyTo(c *eviction.ReclaimedResourcesEvictionConfiguration) error { c.EvictionThreshold = o.EvictionThreshold - c.GracePeriod = o.GracePeriod + c.DeletionGracePeriod = o.GracePeriod + c.ThresholdMetToleranceDuration = o.ThresholdMetToleranceDuration return nil } diff --git a/go.mod b/go.mod index 7d11f436ae..cd25124d5f 100644 --- a/go.mod +++ b/go.mod @@ -146,6 +146,7 @@ require ( ) replace ( + github.com/kubewharf/katalyst-api => github.com/cheney-lin/katalyst-api v0.0.0-20240118123235-2bbb9e0a2982 k8s.io/api => k8s.io/api v0.24.6 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6 k8s.io/apimachinery => k8s.io/apimachinery v0.24.6 diff --git a/go.sum b/go.sum index 8f3ff800d3..fe3e88e2ec 100644 --- a/go.sum +++ b/go.sum @@ -137,6 +137,8 @@ github.com/cespare/xxhash/v2 v2.1.2 h1:YRXhKfTDauu4ajMg1TPgFO5jnlC2HCbmLXMcTG5cb github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5/go.mod h1:/iP1qXHoty45bqomnu2LM+VVyAEdWN+vtSHGlQgyxbw= github.com/checkpoint-restore/go-criu/v5 v5.3.0/go.mod h1:E/eQpaFtUKGOOSEBZgmKAcn+zUUwWxqcaKZlF54wK8E= +github.com/cheney-lin/katalyst-api v0.0.0-20240118123235-2bbb9e0a2982 h1:1ti/aEUmSZiFabnmQkNY+h/wc/wHrrhXAxyKmmiy3fs= +github.com/cheney-lin/katalyst-api v0.0.0-20240118123235-2bbb9e0a2982/go.mod h1:YJJwV7ucsCM9np4KR3FXQTpNVlQS4ceaMl5lxIXXZfo= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -546,8 +548,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.4.0 h1:IvAjHiPNzsPZSNZxXHUZgUZW3H32/V6HgDzhmgTklYE= -github.com/kubewharf/katalyst-api v0.4.0/go.mod h1:YJJwV7ucsCM9np4KR3FXQTpNVlQS4ceaMl5lxIXXZfo= github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4= github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= diff --git a/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go b/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go index 93afb5f80e..2a70ba30e1 100644 --- a/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go +++ b/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go @@ -61,8 +61,12 @@ func NewReclaimedResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.Ev } } - gracePeriodGetter := func() int64 { - return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.GracePeriod + deletionGracePeriodGetter := func() int64 { + return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.DeletionGracePeriod + } + thresholdMetToleranceDuration := func() int64 { + //return conf.GetDynamicConfiguration().ReclaimedResourceConfiguration. + return 0 } p := NewResourcesEvictionPlugin( @@ -71,7 +75,8 @@ func NewReclaimedResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.Ev emitter, reclaimedResourcesGetter, reclaimedThresholdGetter, - gracePeriodGetter, + deletionGracePeriodGetter, + thresholdMetToleranceDuration, conf.SkipZeroQuantityResourceNames, conf.CheckReclaimedQoSForPod, ) diff --git a/pkg/agent/evictionmanager/plugin/resource/resources.go b/pkg/agent/evictionmanager/plugin/resource/resources.go index d7a23228f6..e00715fb4e 100644 --- a/pkg/agent/evictionmanager/plugin/resource/resources.go +++ b/pkg/agent/evictionmanager/plugin/resource/resources.go @@ -51,9 +51,10 @@ type ResourcesEvictionPlugin struct { emitter metrics.MetricEmitter // thresholdGetter is used to get the threshold of resources. - thresholdGetter ThresholdGetter - resourcesGetter ResourcesGetter - gracePeriodGetter GracePeriodGetter + thresholdGetter ThresholdGetter + resourcesGetter ResourcesGetter + deletionGracePeriodGetter GracePeriodGetter + thresholdMetToleranceDurationGetter GracePeriodGetter skipZeroQuantityResourceNames sets.String podFilter func(pod *v1.Pod) (bool, error) @@ -65,18 +66,20 @@ type ResourcesEvictionPlugin struct { func NewResourcesEvictionPlugin(pluginName string, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, resourcesGetter ResourcesGetter, thresholdGetter ThresholdGetter, - gracePeriodGetter GracePeriodGetter, skipZeroQuantityResourceNames sets.String, + deletionGracePeriodGetter GracePeriodGetter, thresholdMetToleranceDurationGetter GracePeriodGetter, + skipZeroQuantityResourceNames sets.String, podFilter func(pod *v1.Pod) (bool, error)) *ResourcesEvictionPlugin { // use the given threshold to override the default configurations plugin := &ResourcesEvictionPlugin{ - pluginName: pluginName, - emitter: emitter, - metaServer: metaServer, - resourcesGetter: resourcesGetter, - thresholdGetter: thresholdGetter, - gracePeriodGetter: gracePeriodGetter, - skipZeroQuantityResourceNames: skipZeroQuantityResourceNames, - podFilter: podFilter, + pluginName: pluginName, + emitter: emitter, + metaServer: metaServer, + resourcesGetter: resourcesGetter, + thresholdGetter: thresholdGetter, + deletionGracePeriodGetter: deletionGracePeriodGetter, + thresholdMetToleranceDurationGetter: thresholdMetToleranceDurationGetter, + skipZeroQuantityResourceNames: skipZeroQuantityResourceNames, + podFilter: podFilter, } return plugin } @@ -190,12 +193,12 @@ func (b *ResourcesEvictionPlugin) ThresholdMet(ctx context.Context) (*pluginapi. resourceName, total, used, *thresholdRate, thresholdValue) return &pluginapi.ThresholdMetResponse{ - ThresholdValue: thresholdValue, - ObservedValue: used, - ThresholdOperator: pluginapi.ThresholdOperator_GREATER_THAN, - MetType: pluginapi.ThresholdMetType_HARD_MET, - EvictionScope: string(resourceName), - // not setting grace period for threshold, make it be handled immediately + ThresholdValue: thresholdValue, + ObservedValue: used, + ThresholdOperator: pluginapi.ThresholdOperator_GREATER_THAN, + MetType: pluginapi.ThresholdMetType_HARD_MET, + EvictionScope: string(resourceName), + GracePeriodSeconds: b.thresholdMetToleranceDurationGetter(), }, nil } } @@ -233,7 +236,7 @@ func (b *ResourcesEvictionPlugin) GetTopEvictionPods(_ context.Context, request retLen := general.MinUInt64(request.TopN, uint64(len(activeFilteredPods))) var deletionOptions *pluginapi.DeletionOptions - if gracePeriod := b.gracePeriodGetter(); gracePeriod > 0 { + if gracePeriod := b.deletionGracePeriodGetter(); gracePeriod > 0 { deletionOptions = &pluginapi.DeletionOptions{ GracePeriodSeconds: gracePeriod, } diff --git a/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go b/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go index 1ba19384ca..c1ac410f55 100644 --- a/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go +++ b/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go @@ -22,8 +22,9 @@ import ( ) type ReclaimedResourcesEvictionConfiguration struct { - EvictionThreshold native.ResourceThreshold - GracePeriod int64 + EvictionThreshold native.ResourceThreshold + DeletionGracePeriod int64 + ThresholdMetToleranceDuration int64 } func NewReclaimedResourcesEvictionConfiguration() *ReclaimedResourcesEvictionConfiguration { @@ -41,7 +42,11 @@ func (c *ReclaimedResourcesEvictionConfiguration) ApplyConfiguration(conf *crd.D } if config.GracePeriod != nil { - c.GracePeriod = *config.GracePeriod + c.DeletionGracePeriod = *config.GracePeriod + } + + if config.ThresholdMetToleranceDuration != nil { + c.ThresholdMetToleranceDuration = *config.ThresholdMetToleranceDuration } } } diff --git a/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go b/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go index d088ffa81d..e8c0269cc0 100644 --- a/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go +++ b/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go @@ -172,7 +172,7 @@ func (t *CNRTaintHelper) TryUNTaintCNR(name string) error { } if equality.Semantic.DeepEqual(cnr, newCNR) { - general.Infof("taint already disappears, not need to update") + klog.V(5).Infof("taint already disappears, not need to update") return nil }