Skip to content

Commit

Permalink
feat(eviction): add eviction pod metric labels
Browse files Browse the repository at this point in the history
  • Loading branch information
nightmeng authored and waynepeking348 committed Mar 26, 2024
1 parent 58177dd commit 569d59f
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 13 deletions.
7 changes: 7 additions & 0 deletions cmd/katalyst-agent/app/options/eviction/eviction_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ type GenericEvictionOptions struct {

// StrictAuthentication means whether to authenticate plugins strictly
StrictAuthentication bool

// PodMetricLabels defines the pod labels to be added into metric selector list.
PodMetricLabels []string
}

// NewGenericEvictionOptions creates a new Options with a default config.
Expand Down Expand Up @@ -93,6 +96,9 @@ func (o *GenericEvictionOptions) AddFlags(fss *cliflag.NamedFlagSets) {
fs.BoolVar(&o.StrictAuthentication, "strict-authentication", o.StrictAuthentication,
"whether to authenticate plugins strictly, the out-of-tree plugins must use valid and authorized token "+
"to register if it set to true")

fs.StringSliceVar(&o.PodMetricLabels, "eviction-pod-metric-labels", o.PodMetricLabels,
"The pod labels to be added into metric selector list")
}

// ApplyTo fills up config with options
Expand All @@ -105,6 +111,7 @@ func (o *GenericEvictionOptions) ApplyTo(c *evictionconfig.GenericEvictionConfig
c.EvictionBurst = o.EvictionBurst
c.PodKiller = o.PodKiller
c.StrictAuthentication = o.StrictAuthentication
c.PodMetricLabels.Insert(o.PodMetricLabels...)
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/agent/evictionmanager/eviction_resp_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (e *evictionRespCollector) collectEvictPods(dryRunPlugins []string, pluginN
e.getLogPrefix(dryRun), pluginName, evictPod.Pod.Namespace, evictPod.Pod.Name, evictPod.Reason, evictPod.ForceEvict)

if dryRun {
metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, evictPod.Pod, dryRun)
metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, evictPod.Pod, dryRun, e.conf.GenericEvictionConfiguration.PodMetricLabels)
} else {
evictPods = append(evictPods, resp.EvictPods[i])
}
Expand Down Expand Up @@ -171,7 +171,7 @@ func (e *evictionRespCollector) collectTopEvictionPods(dryRunPlugins []string, p
general.Infof("%v plugin %v request to evict topN pod %v/%v, reason: met threshold in scope [%v]",
e.getLogPrefix(dryRun), pluginName, pod.Namespace, pod.Name, threshold.EvictionScope)
if dryRun {
metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, pod, dryRun)
metricsPodToEvict(e.emitter, e.conf.GenericConfiguration.QoSConfiguration, pluginName, pod, dryRun, e.conf.GenericEvictionConfiguration.PodMetricLabels)
} else {
targetPods = append(targetPods, resp.TargetPods[i])
}
Expand Down
43 changes: 32 additions & 11 deletions pkg/agent/evictionmanager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"context"
"fmt"
"strconv"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -69,6 +70,8 @@ const (
ValidateFailedReasonNoPermission = "no_permission"

UserUnknown = "unknown"

MetricsPodLabelPrefix = "pod"
)

// LatestCNRGetter returns the latest CNR resources.
Expand Down Expand Up @@ -349,7 +352,7 @@ func (m *EvictionManger) doEvict(softEvictPods, forceEvictPods map[string]*rule.
general.Infof(" evict %d pods in evictionmanager", len(rpList))
_ = m.emitter.StoreInt64(MetricsNameVictimPodCNT, int64(len(rpList)), metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "type", Val: "total"})
metricPodsToEvict(m.emitter, rpList, m.conf.GenericConfiguration.QoSConfiguration)
metricPodsToEvict(m.emitter, rpList, m.conf.GenericConfiguration.QoSConfiguration, m.conf.GenericEvictionConfiguration.PodMetricLabels)
}

// ValidatePlugin validates a plugin if the version is correct and the name has the format of an extended resource
Expand Down Expand Up @@ -609,20 +612,20 @@ func logConfirmedThresholdMet(thresholds map[string]*pluginapi.ThresholdMetRespo
}
}

func metricPodsToEvict(emitter metrics.MetricEmitter, rpList rule.RuledEvictPodList, qosConfig *generic.QoSConfiguration) {
func metricPodsToEvict(emitter metrics.MetricEmitter, rpList rule.RuledEvictPodList, qosConfig *generic.QoSConfiguration, podMetricLabels sets.String) {
if emitter == nil {
general.Errorf(" metricPodsToEvict got nil emitter")
return
}

for _, rp := range rpList {
if rp != nil && rp.EvictionPluginName != "" {
metricsPodToEvict(emitter, qosConfig, rp.EvictionPluginName, rp.Pod, false)
metricsPodToEvict(emitter, qosConfig, rp.EvictionPluginName, rp.Pod, false, podMetricLabels)
}
}
}

func metricsPodToEvict(emitter metrics.MetricEmitter, qosConfig *generic.QoSConfiguration, pluginName string, pod *v1.Pod, dryRun bool) {
func metricsPodToEvict(emitter metrics.MetricEmitter, qosConfig *generic.QoSConfiguration, pluginName string, pod *v1.Pod, dryRun bool, podMetricLabels sets.String) {
podQosLevel := "unknown"
if qosConfig != nil {
qosLevel, err := qosConfig.GetQoSLevelForPod(pod)
Expand All @@ -634,11 +637,29 @@ func metricsPodToEvict(emitter metrics.MetricEmitter, qosConfig *generic.QoSConf
if dryRun {
metricKey = MetricsNameDryRunVictimPodCNT
}
_ = emitter.StoreInt64(metricKey, 1, metrics.MetricTypeNameRaw,
metrics.MetricTag{Key: "name", Val: pluginName},
metrics.MetricTag{Key: "type", Val: "plugin"},
metrics.MetricTag{Key: "victim_ns", Val: pod.Namespace},
metrics.MetricTag{Key: "victim_name", Val: pod.Name},
metrics.MetricTag{Key: "qos", Val: podQosLevel},
)

metricTags := []metrics.MetricTag{
{Key: "name", Val: pluginName},
{Key: "type", Val: "plugin"},
{Key: "victim_ns", Val: pod.Namespace},
{Key: "victim_name", Val: pod.Name},
{Key: "qos", Val: podQosLevel},
}
if pod.Labels != nil {
for _, metricLabel := range podMetricLabels.List() {
metricValue, ok := pod.Labels[metricLabel]
if ok {
metricTags = append(metricTags, metrics.MetricTag{
Key: genPodLabelMetricKey(metricLabel),
Val: metricValue,
})
}
}
}
_ = emitter.StoreInt64(metricKey, 1, metrics.MetricTypeNameRaw, metricTags...)
}

func genPodLabelMetricKey(key string) string {
key = strings.ReplaceAll(key, "-", "_")
return strings.Join([]string{MetricsPodLabelPrefix, key}, "_")
}
4 changes: 4 additions & 0 deletions pkg/config/agent/eviction/eviciton_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ type GenericEvictionConfiguration struct {

// StrictAuthentication means whether to authenticate plugins strictly
StrictAuthentication bool

// PodMetricLabels defines the pod labels to be added in metric selector lists
PodMetricLabels sets.String
}

type EvictionConfiguration struct {
Expand All @@ -60,6 +63,7 @@ func NewGenericEvictionConfiguration() *GenericEvictionConfiguration {
return &GenericEvictionConfiguration{
EvictionSkippedAnnotationKeys: sets.NewString(),
EvictionSkippedLabelKeys: sets.NewString(),
PodMetricLabels: sets.NewString(),
}
}

Expand Down

0 comments on commit 569d59f

Please sign in to comment.