diff --git a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go index bad84937b..e969c9374 100644 --- a/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go +++ b/cmd/katalyst-agent/app/options/dynamic/adminqos/eviction/reclaimed_resources_eviction.go @@ -25,8 +25,9 @@ import ( ) type ReclaimedResourcesEvictionOptions struct { - EvictionThreshold native.ResourceThreshold - GracePeriod int64 + EvictionThreshold native.ResourceThreshold + GracePeriod int64 + ThresholdMetToleranceDuration int64 } func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions { @@ -35,7 +36,8 @@ func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions { consts.ReclaimedResourceMilliCPU: 5.0, consts.ReclaimedResourceMemory: 5.0, }, - GracePeriod: 60, + GracePeriod: 60, + ThresholdMetToleranceDuration: 0, } } @@ -46,10 +48,13 @@ func (o *ReclaimedResourcesEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) "the threshold rate for best effort resources") fs.Int64Var(&o.GracePeriod, "eviction-reclaimed-resources-grace-period", o.GracePeriod, "the graceful eviction period (in seconds) for reclaimed pods") + fs.Int64Var(&o.ThresholdMetToleranceDuration, "eviction-reclaimed-resources-threshold-met-tolerance-duration", + o.ThresholdMetToleranceDuration, "the tolerance duration before eviction.") } func (o *ReclaimedResourcesEvictionOptions) ApplyTo(c *eviction.ReclaimedResourcesEvictionConfiguration) error { c.EvictionThreshold = o.EvictionThreshold - c.GracePeriod = o.GracePeriod + c.DeletionGracePeriod = o.GracePeriod + c.ThresholdMetToleranceDuration = o.ThresholdMetToleranceDuration return nil } diff --git a/go.mod b/go.mod index 7d11f436a..be22ff464 100644 --- a/go.mod +++ b/go.mod @@ -12,7 +12,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/golang/protobuf v1.5.2 github.com/google/cadvisor v0.44.2 - github.com/kubewharf/katalyst-api v0.4.0 + github.com/kubewharf/katalyst-api v0.4.1-0.20240118125832-edab146c0c0c github.com/montanaflynn/stats v0.7.1 github.com/opencontainers/runc v1.1.6 github.com/opencontainers/selinux v1.10.0 diff --git a/go.sum b/go.sum index 8f3ff800d..63406d883 100644 --- a/go.sum +++ b/go.sum @@ -546,8 +546,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kubewharf/katalyst-api v0.4.0 h1:IvAjHiPNzsPZSNZxXHUZgUZW3H32/V6HgDzhmgTklYE= -github.com/kubewharf/katalyst-api v0.4.0/go.mod h1:YJJwV7ucsCM9np4KR3FXQTpNVlQS4ceaMl5lxIXXZfo= +github.com/kubewharf/katalyst-api v0.4.1-0.20240118125832-edab146c0c0c h1:z8aEJTcfu8mv7AuVAQK+uVMjUcR1eSeE1ELoA9hmKGw= +github.com/kubewharf/katalyst-api v0.4.1-0.20240118125832-edab146c0c0c/go.mod h1:YJJwV7ucsCM9np4KR3FXQTpNVlQS4ceaMl5lxIXXZfo= github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4= github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c= github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8= diff --git a/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go b/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go index 93afb5f80..797444991 100644 --- a/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go +++ b/pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go @@ -61,8 +61,11 @@ func NewReclaimedResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.Ev } } - gracePeriodGetter := func() int64 { - return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.GracePeriod + deletionGracePeriodGetter := func() int64 { + return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.DeletionGracePeriod + } + thresholdMetToleranceDurationGetter := func() int64 { + return conf.GetDynamicConfiguration().ThresholdMetToleranceDuration } p := NewResourcesEvictionPlugin( @@ -71,7 +74,8 @@ func NewReclaimedResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.Ev emitter, reclaimedResourcesGetter, reclaimedThresholdGetter, - gracePeriodGetter, + deletionGracePeriodGetter, + thresholdMetToleranceDurationGetter, conf.SkipZeroQuantityResourceNames, conf.CheckReclaimedQoSForPod, ) diff --git a/pkg/agent/evictionmanager/plugin/resource/resources.go b/pkg/agent/evictionmanager/plugin/resource/resources.go index d7a23228f..e00715fb4 100644 --- a/pkg/agent/evictionmanager/plugin/resource/resources.go +++ b/pkg/agent/evictionmanager/plugin/resource/resources.go @@ -51,9 +51,10 @@ type ResourcesEvictionPlugin struct { emitter metrics.MetricEmitter // thresholdGetter is used to get the threshold of resources. - thresholdGetter ThresholdGetter - resourcesGetter ResourcesGetter - gracePeriodGetter GracePeriodGetter + thresholdGetter ThresholdGetter + resourcesGetter ResourcesGetter + deletionGracePeriodGetter GracePeriodGetter + thresholdMetToleranceDurationGetter GracePeriodGetter skipZeroQuantityResourceNames sets.String podFilter func(pod *v1.Pod) (bool, error) @@ -65,18 +66,20 @@ type ResourcesEvictionPlugin struct { func NewResourcesEvictionPlugin(pluginName string, metaServer *metaserver.MetaServer, emitter metrics.MetricEmitter, resourcesGetter ResourcesGetter, thresholdGetter ThresholdGetter, - gracePeriodGetter GracePeriodGetter, skipZeroQuantityResourceNames sets.String, + deletionGracePeriodGetter GracePeriodGetter, thresholdMetToleranceDurationGetter GracePeriodGetter, + skipZeroQuantityResourceNames sets.String, podFilter func(pod *v1.Pod) (bool, error)) *ResourcesEvictionPlugin { // use the given threshold to override the default configurations plugin := &ResourcesEvictionPlugin{ - pluginName: pluginName, - emitter: emitter, - metaServer: metaServer, - resourcesGetter: resourcesGetter, - thresholdGetter: thresholdGetter, - gracePeriodGetter: gracePeriodGetter, - skipZeroQuantityResourceNames: skipZeroQuantityResourceNames, - podFilter: podFilter, + pluginName: pluginName, + emitter: emitter, + metaServer: metaServer, + resourcesGetter: resourcesGetter, + thresholdGetter: thresholdGetter, + deletionGracePeriodGetter: deletionGracePeriodGetter, + thresholdMetToleranceDurationGetter: thresholdMetToleranceDurationGetter, + skipZeroQuantityResourceNames: skipZeroQuantityResourceNames, + podFilter: podFilter, } return plugin } @@ -190,12 +193,12 @@ func (b *ResourcesEvictionPlugin) ThresholdMet(ctx context.Context) (*pluginapi. resourceName, total, used, *thresholdRate, thresholdValue) return &pluginapi.ThresholdMetResponse{ - ThresholdValue: thresholdValue, - ObservedValue: used, - ThresholdOperator: pluginapi.ThresholdOperator_GREATER_THAN, - MetType: pluginapi.ThresholdMetType_HARD_MET, - EvictionScope: string(resourceName), - // not setting grace period for threshold, make it be handled immediately + ThresholdValue: thresholdValue, + ObservedValue: used, + ThresholdOperator: pluginapi.ThresholdOperator_GREATER_THAN, + MetType: pluginapi.ThresholdMetType_HARD_MET, + EvictionScope: string(resourceName), + GracePeriodSeconds: b.thresholdMetToleranceDurationGetter(), }, nil } } @@ -233,7 +236,7 @@ func (b *ResourcesEvictionPlugin) GetTopEvictionPods(_ context.Context, request retLen := general.MinUInt64(request.TopN, uint64(len(activeFilteredPods))) var deletionOptions *pluginapi.DeletionOptions - if gracePeriod := b.gracePeriodGetter(); gracePeriod > 0 { + if gracePeriod := b.deletionGracePeriodGetter(); gracePeriod > 0 { deletionOptions = &pluginapi.DeletionOptions{ GracePeriodSeconds: gracePeriod, } diff --git a/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go b/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go index 1ba19384c..c1ac410f5 100644 --- a/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go +++ b/pkg/config/agent/dynamic/adminqos/eviction/reclaimed_resources_eviction.go @@ -22,8 +22,9 @@ import ( ) type ReclaimedResourcesEvictionConfiguration struct { - EvictionThreshold native.ResourceThreshold - GracePeriod int64 + EvictionThreshold native.ResourceThreshold + DeletionGracePeriod int64 + ThresholdMetToleranceDuration int64 } func NewReclaimedResourcesEvictionConfiguration() *ReclaimedResourcesEvictionConfiguration { @@ -41,7 +42,11 @@ func (c *ReclaimedResourcesEvictionConfiguration) ApplyConfiguration(conf *crd.D } if config.GracePeriod != nil { - c.GracePeriod = *config.GracePeriod + c.DeletionGracePeriod = *config.GracePeriod + } + + if config.ThresholdMetToleranceDuration != nil { + c.ThresholdMetToleranceDuration = *config.ThresholdMetToleranceDuration } } } diff --git a/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go b/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go index d088ffa81..1f252edea 100644 --- a/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go +++ b/pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go @@ -172,7 +172,7 @@ func (t *CNRTaintHelper) TryUNTaintCNR(name string) error { } if equality.Semantic.DeepEqual(cnr, newCNR) { - general.Infof("taint already disappears, not need to update") + klog.V(5).InfoS("taint already disappears, not need to update", "cnr", cnr.Name) return nil }