Skip to content

Commit

Permalink
installer: add install time duration metric
Browse files Browse the repository at this point in the history
Signed-off-by: Damien Grisonnet <dgrisonn@redhat.com>
  • Loading branch information
dgrisonnet committed Oct 8, 2024
1 parent 756adf2 commit 3a2c652
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/openshift/library-go/pkg/operator/management"
"github.com/openshift/library-go/pkg/operator/resource/resourceapply"
"github.com/openshift/library-go/pkg/operator/resource/resourceread"
"github.com/openshift/library-go/pkg/operator/staticpod/controller/installer/metrics"
"github.com/openshift/library-go/pkg/operator/staticpod/controller/revision"
"github.com/openshift/library-go/pkg/operator/staticpod/startupmonitor/annotations"
"github.com/openshift/library-go/pkg/operator/v1helpers"
Expand All @@ -31,6 +32,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
corev1client "k8s.io/client-go/kubernetes/typed/core/v1"
basemetrics "k8s.io/component-base/metrics"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
)
Expand Down Expand Up @@ -98,6 +100,9 @@ type InstallerController struct {
clock clock.Clock
installerBackOff func(count int) time.Duration
fallbackBackOff func(count int) time.Duration

metrics *metrics.Metrics
rolloutStartTime time.Time
}

// InstallerPodMutationFunc is a function that has a chance at changing the installer pod before it is created
Expand Down Expand Up @@ -186,6 +191,8 @@ func NewInstallerController(
clock: clock.RealClock{},
installerBackOff: backOffDuration(10*time.Second, 1.5, 10*time.Minute),
fallbackBackOff: backOffDuration(10*time.Minute, 2, 2*time.Hour), // 10min, 20min, 40min, 1h20, 2h

metrics: metrics.New(),
}

c.ownerRefsFn = c.setOwnerRefs
Expand All @@ -212,6 +219,10 @@ func (c InstallerController) Name() string {
return "InstallerController"
}

func (c *InstallerController) RegisterMetrics(registrationFunc func(basemetrics.Registerable) error) error {
return c.metrics.Register(registrationFunc)
}

// getStaticPodState returns
// - the state of the static pod,
// - its revision (in case of the fallback static pod the revision of the non-fallback one),
Expand Down Expand Up @@ -514,6 +525,9 @@ func (c *InstallerController) manageInstallationPods(ctx context.Context, operat
} else if updated && currNodeState.CurrentRevision != newCurrNodeState.CurrentRevision {
c.eventRecorder.Eventf("NodeCurrentRevisionChanged", "Updated node %q from revision %d to %d because %s", currNodeState.NodeName,
currNodeState.CurrentRevision, newCurrNodeState.CurrentRevision, reason)
klog.Infof("Recording static pod rollout that lasted %f", time.Since(c.rolloutStartTime).Seconds())
c.metrics.ObserveStaticPodRollout(c.staticPodName, time.Since(c.rolloutStartTime).Seconds())
c.rolloutStartTime = time.Time{}
}

return false, 0, nil // no requeue because UpdateStaticPodStatus triggers an external event anyway
Expand Down Expand Up @@ -936,6 +950,11 @@ func (c *InstallerController) ensureInstallerPod(ctx context.Context, operatorSp
}

_, _, err = resourceapply.ApplyPod(ctx, c.podsGetter, c.eventRecorder, pod)

if c.rolloutStartTime.IsZero() {
c.rolloutStartTime = time.Now()
}

return err
}

Expand Down
38 changes: 38 additions & 0 deletions pkg/operator/staticpod/controller/installer/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package metrics

import (
"k8s.io/component-base/metrics"
)

const (
namespace = "installer_controller"
subsystem = "static_pod"
)

type Metrics struct {
staticPodRolloutDuration *metrics.HistogramVec
}

func New() *Metrics {
return &Metrics{
staticPodRolloutDuration: metrics.NewHistogramVec(
&metrics.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "rollout_duration",
Help: "Duration of static pod rollouts broken down by pod name in seconds.",
Buckets: []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192},
StabilityLevel: metrics.ALPHA,
},
[]string{"pod"},
),
}
}

func (m *Metrics) Register(registrationFunc func(metrics.Registerable) error) error {
return registrationFunc(m.staticPodRolloutDuration)
}

func (m *Metrics) ObserveStaticPodRollout(pod string, duration float64) {
m.staticPodRolloutDuration.WithLabelValues(pod).Observe(duration)
}
14 changes: 12 additions & 2 deletions pkg/operator/staticpod/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/client-go/kubernetes"
"k8s.io/component-base/metrics/legacyregistry"
)

type staticPodOperatorControllerBuilder struct {
Expand Down Expand Up @@ -247,7 +248,7 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller
}

if len(b.installCommand) > 0 {
manager.WithController(installer.NewInstallerController(
installerController := installer.NewInstallerController(
b.operandNamespace,
b.staticPodName,
b.revisionConfigMaps,
Expand All @@ -267,7 +268,16 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller
b.installerPodMutationFunc,
).WithMinReadyDuration(
b.minReadyDuration,
), 1)
)

// TODO: register the metrics more cleanly at the operator level and
// decouple it from the legacy registry
err := installerController.RegisterMetrics(legacyregistry.Register)
if err != nil {
errs = append(errs, fmt.Errorf("failed to register installer controller metrics: %v", err))
}

manager.WithController(installerController, 1)

manager.WithController(installerstate.NewInstallerStateController(
b.operandName,
Expand Down

0 comments on commit 3a2c652

Please sign in to comment.