From 6ad7b44da0445d91178940a13e32e8c84fe0481a Mon Sep 17 00:00:00 2001 From: Damien Grisonnet Date: Thu, 3 Oct 2024 17:54:53 +0200 Subject: [PATCH] installer: add install time duration metric Signed-off-by: Damien Grisonnet --- .../installer/installer_controller.go | 18 +++++++++ .../controller/installer/metrics/metrics.go | 38 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 pkg/operator/staticpod/controller/installer/metrics/metrics.go diff --git a/pkg/operator/staticpod/controller/installer/installer_controller.go b/pkg/operator/staticpod/controller/installer/installer_controller.go index 418f652fb1..22e7c1ea39 100644 --- a/pkg/operator/staticpod/controller/installer/installer_controller.go +++ b/pkg/operator/staticpod/controller/installer/installer_controller.go @@ -20,6 +20,7 @@ import ( "github.com/openshift/library-go/pkg/operator/management" "github.com/openshift/library-go/pkg/operator/resource/resourceapply" "github.com/openshift/library-go/pkg/operator/resource/resourceread" + "github.com/openshift/library-go/pkg/operator/staticpod/controller/installer/metrics" "github.com/openshift/library-go/pkg/operator/staticpod/controller/revision" "github.com/openshift/library-go/pkg/operator/staticpod/startupmonitor/annotations" "github.com/openshift/library-go/pkg/operator/v1helpers" @@ -31,6 +32,7 @@ import ( "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/informers" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" + basemetrics "k8s.io/component-base/metrics" "k8s.io/klog/v2" "k8s.io/utils/clock" ) @@ -98,6 +100,9 @@ type InstallerController struct { clock clock.Clock installerBackOff func(count int) time.Duration fallbackBackOff func(count int) time.Duration + + metrics *metrics.Metrics + rolloutStartTime time.Time } // InstallerPodMutationFunc is a function that has a chance at changing the installer pod before it is created @@ -186,6 +191,8 @@ func NewInstallerController( clock: clock.RealClock{}, installerBackOff: backOffDuration(10*time.Second, 1.5, 10*time.Minute), fallbackBackOff: backOffDuration(10*time.Minute, 2, 2*time.Hour), // 10min, 20min, 40min, 1h20, 2h + + metrics: metrics.New(), } c.ownerRefsFn = c.setOwnerRefs @@ -212,6 +219,10 @@ func (c InstallerController) Name() string { return "InstallerController" } +func (c *InstallerController) RegisterMetrics(registrationFunc func(basemetrics.Registerable) error) error { + return c.metrics.Register(registrationFunc) +} + // getStaticPodState returns // - the state of the static pod, // - its revision (in case of the fallback static pod the revision of the non-fallback one), @@ -514,6 +525,8 @@ func (c *InstallerController) manageInstallationPods(ctx context.Context, operat } else if updated && currNodeState.CurrentRevision != newCurrNodeState.CurrentRevision { c.eventRecorder.Eventf("NodeCurrentRevisionChanged", "Updated node %q from revision %d to %d because %s", currNodeState.NodeName, currNodeState.CurrentRevision, newCurrNodeState.CurrentRevision, reason) + c.metrics.ObserveStaticPodRollout(c.staticPodName, float64(time.Since(c.rolloutStartTime))) + c.rolloutStartTime = time.Time{} } return false, 0, nil // no requeue because UpdateStaticPodStatus triggers an external event anyway @@ -936,6 +949,11 @@ func (c *InstallerController) ensureInstallerPod(ctx context.Context, operatorSp } _, _, err = resourceapply.ApplyPod(ctx, c.podsGetter, c.eventRecorder, pod) + + if c.rolloutStartTime.IsZero() { + c.rolloutStartTime = time.Now() + } + return err } diff --git a/pkg/operator/staticpod/controller/installer/metrics/metrics.go b/pkg/operator/staticpod/controller/installer/metrics/metrics.go new file mode 100644 index 0000000000..9ee06d18f5 --- /dev/null +++ b/pkg/operator/staticpod/controller/installer/metrics/metrics.go @@ -0,0 +1,38 @@ +package metrics + +import ( + "k8s.io/component-base/metrics" +) + +const ( + namespace = "installer_controller" + subsystem = "static_pod" +) + +type Metrics struct { + staticPodRolloutDuration *metrics.HistogramVec +} + +func New() *Metrics { + return &Metrics{ + staticPodRolloutDuration: metrics.NewHistogramVec( + &metrics.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: "rollout_duration", + Help: "Duration of static pod rollouts broken down by pod name.", + Buckets: metrics.DefBuckets, + StabilityLevel: metrics.ALPHA, + }, + []string{"pod"}, + ), + } +} + +func (m *Metrics) Register(registrationFunc func(metrics.Registerable) error) error { + return registrationFunc(m.staticPodRolloutDuration) +} + +func (m *Metrics) ObserveStaticPodRollout(pod string, duration float64) { + m.staticPodRolloutDuration.WithLabelValues(pod).Observe(duration) +}