Skip to content

Commit

Permalink
refactor(eviction): support config eviciton tolerance duration
Browse files Browse the repository at this point in the history
Signed-off-by: linzhecheng <linzhecheng@bytedance.com>
  • Loading branch information
cheney-lin committed Jan 19, 2024
1 parent f0e37c9 commit 81daf37
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ import (
)

type ReclaimedResourcesEvictionOptions struct {
EvictionThreshold native.ResourceThreshold
GracePeriod int64
EvictionThreshold native.ResourceThreshold
GracePeriod int64
ThresholdMetToleranceDuration int64
}

func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions {
Expand All @@ -35,7 +36,8 @@ func NewReclaimedResourcesEvictionOptions() *ReclaimedResourcesEvictionOptions {
consts.ReclaimedResourceMilliCPU: 5.0,
consts.ReclaimedResourceMemory: 5.0,
},
GracePeriod: 60,
GracePeriod: 60,
ThresholdMetToleranceDuration: 0,
}
}

Expand All @@ -46,10 +48,13 @@ func (o *ReclaimedResourcesEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets)
"the threshold rate for best effort resources")
fs.Int64Var(&o.GracePeriod, "eviction-reclaimed-resources-grace-period", o.GracePeriod,
"the graceful eviction period (in seconds) for reclaimed pods")
fs.Int64Var(&o.ThresholdMetToleranceDuration, "eviction-reclaimed-resources-threshold-met-tolerance-duration",
o.ThresholdMetToleranceDuration, "the tolerance duration before eviction.")
}

func (o *ReclaimedResourcesEvictionOptions) ApplyTo(c *eviction.ReclaimedResourcesEvictionConfiguration) error {
c.EvictionThreshold = o.EvictionThreshold
c.GracePeriod = o.GracePeriod
c.DeletionGracePeriod = o.GracePeriod
c.ThresholdMetToleranceDuration = o.ThresholdMetToleranceDuration
return nil
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ require (
github.com/gogo/protobuf v1.3.2
github.com/golang/protobuf v1.5.2
github.com/google/cadvisor v0.44.2
github.com/kubewharf/katalyst-api v0.4.0
github.com/kubewharf/katalyst-api v0.4.1-0.20240118125832-edab146c0c0c
github.com/montanaflynn/stats v0.7.1
github.com/opencontainers/runc v1.1.6
github.com/opencontainers/selinux v1.10.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -546,8 +546,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.4.0 h1:IvAjHiPNzsPZSNZxXHUZgUZW3H32/V6HgDzhmgTklYE=
github.com/kubewharf/katalyst-api v0.4.0/go.mod h1:YJJwV7ucsCM9np4KR3FXQTpNVlQS4ceaMl5lxIXXZfo=
github.com/kubewharf/katalyst-api v0.4.1-0.20240118125832-edab146c0c0c h1:z8aEJTcfu8mv7AuVAQK+uVMjUcR1eSeE1ELoA9hmKGw=
github.com/kubewharf/katalyst-api v0.4.1-0.20240118125832-edab146c0c0c/go.mod h1:YJJwV7ucsCM9np4KR3FXQTpNVlQS4ceaMl5lxIXXZfo=
github.com/kubewharf/kubelet v1.24.6-kubewharf.8 h1:2e89T/nZTgzaVhyRsZuwEdRk8V8kJXs4PRkgfeG4Ai4=
github.com/kubewharf/kubelet v1.24.6-kubewharf.8/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
Expand Down
10 changes: 7 additions & 3 deletions pkg/agent/evictionmanager/plugin/resource/reclaimed_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,11 @@ func NewReclaimedResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.Ev
}
}

gracePeriodGetter := func() int64 {
return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.GracePeriod
deletionGracePeriodGetter := func() int64 {
return conf.GetDynamicConfiguration().ReclaimedResourcesEvictionConfiguration.DeletionGracePeriod
}
thresholdMetToleranceDuration := func() int64 {
return conf.GetDynamicConfiguration().ThresholdMetToleranceDuration
}

p := NewResourcesEvictionPlugin(
Expand All @@ -71,7 +74,8 @@ func NewReclaimedResourcesEvictionPlugin(_ *client.GenericClientSet, _ events.Ev
emitter,
reclaimedResourcesGetter,
reclaimedThresholdGetter,
gracePeriodGetter,
deletionGracePeriodGetter,
thresholdMetToleranceDuration,
conf.SkipZeroQuantityResourceNames,
conf.CheckReclaimedQoSForPod,
)
Expand Down
41 changes: 22 additions & 19 deletions pkg/agent/evictionmanager/plugin/resource/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ type ResourcesEvictionPlugin struct {
emitter metrics.MetricEmitter

// thresholdGetter is used to get the threshold of resources.
thresholdGetter ThresholdGetter
resourcesGetter ResourcesGetter
gracePeriodGetter GracePeriodGetter
thresholdGetter ThresholdGetter
resourcesGetter ResourcesGetter
deletionGracePeriodGetter GracePeriodGetter
thresholdMetToleranceDurationGetter GracePeriodGetter

skipZeroQuantityResourceNames sets.String
podFilter func(pod *v1.Pod) (bool, error)
Expand All @@ -65,18 +66,20 @@ type ResourcesEvictionPlugin struct {

func NewResourcesEvictionPlugin(pluginName string, metaServer *metaserver.MetaServer,
emitter metrics.MetricEmitter, resourcesGetter ResourcesGetter, thresholdGetter ThresholdGetter,
gracePeriodGetter GracePeriodGetter, skipZeroQuantityResourceNames sets.String,
deletionGracePeriodGetter GracePeriodGetter, thresholdMetToleranceDurationGetter GracePeriodGetter,
skipZeroQuantityResourceNames sets.String,
podFilter func(pod *v1.Pod) (bool, error)) *ResourcesEvictionPlugin {
// use the given threshold to override the default configurations
plugin := &ResourcesEvictionPlugin{
pluginName: pluginName,
emitter: emitter,
metaServer: metaServer,
resourcesGetter: resourcesGetter,
thresholdGetter: thresholdGetter,
gracePeriodGetter: gracePeriodGetter,
skipZeroQuantityResourceNames: skipZeroQuantityResourceNames,
podFilter: podFilter,
pluginName: pluginName,
emitter: emitter,
metaServer: metaServer,
resourcesGetter: resourcesGetter,
thresholdGetter: thresholdGetter,
deletionGracePeriodGetter: deletionGracePeriodGetter,
thresholdMetToleranceDurationGetter: thresholdMetToleranceDurationGetter,
skipZeroQuantityResourceNames: skipZeroQuantityResourceNames,
podFilter: podFilter,
}
return plugin
}
Expand Down Expand Up @@ -190,12 +193,12 @@ func (b *ResourcesEvictionPlugin) ThresholdMet(ctx context.Context) (*pluginapi.
resourceName, total, used, *thresholdRate, thresholdValue)

return &pluginapi.ThresholdMetResponse{
ThresholdValue: thresholdValue,
ObservedValue: used,
ThresholdOperator: pluginapi.ThresholdOperator_GREATER_THAN,
MetType: pluginapi.ThresholdMetType_HARD_MET,
EvictionScope: string(resourceName),
// not setting grace period for threshold, make it be handled immediately
ThresholdValue: thresholdValue,
ObservedValue: used,
ThresholdOperator: pluginapi.ThresholdOperator_GREATER_THAN,
MetType: pluginapi.ThresholdMetType_HARD_MET,
EvictionScope: string(resourceName),
GracePeriodSeconds: b.thresholdMetToleranceDurationGetter(),
}, nil
}
}
Expand Down Expand Up @@ -233,7 +236,7 @@ func (b *ResourcesEvictionPlugin) GetTopEvictionPods(_ context.Context, request
retLen := general.MinUInt64(request.TopN, uint64(len(activeFilteredPods)))

var deletionOptions *pluginapi.DeletionOptions
if gracePeriod := b.gracePeriodGetter(); gracePeriod > 0 {
if gracePeriod := b.deletionGracePeriodGetter(); gracePeriod > 0 {
deletionOptions = &pluginapi.DeletionOptions{
GracePeriodSeconds: gracePeriod,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ import (
)

type ReclaimedResourcesEvictionConfiguration struct {
EvictionThreshold native.ResourceThreshold
GracePeriod int64
EvictionThreshold native.ResourceThreshold
DeletionGracePeriod int64
ThresholdMetToleranceDuration int64
}

func NewReclaimedResourcesEvictionConfiguration() *ReclaimedResourcesEvictionConfiguration {
Expand All @@ -41,7 +42,11 @@ func (c *ReclaimedResourcesEvictionConfiguration) ApplyConfiguration(conf *crd.D
}

if config.GracePeriod != nil {
c.GracePeriod = *config.GracePeriod
c.DeletionGracePeriod = *config.GracePeriod
}

if config.ThresholdMetToleranceDuration != nil {
c.ThresholdMetToleranceDuration = *config.ThresholdMetToleranceDuration
}
}
}
2 changes: 1 addition & 1 deletion pkg/controller/lifecycle/agent-healthz/helper/taint_cnr.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func (t *CNRTaintHelper) TryUNTaintCNR(name string) error {
}

if equality.Semantic.DeepEqual(cnr, newCNR) {
general.Infof("taint already disappears, not need to update")
klog.V(5).Infof("taint already disappears, not need to update")
return nil
}

Expand Down

0 comments on commit 81daf37

Please sign in to comment.