From 011a62f08ae8d7438076d07e007f710415aff45e Mon Sep 17 00:00:00 2001 From: leandrorichardtoledo <169929015+leandrorichardtoledo@users.noreply.github.com> Date: Thu, 14 Nov 2024 17:34:53 -0300 Subject: [PATCH] PTEUDO-1658: Improving custom dbc metrics publishing (#359) --- cmd/main.go | 28 +++++++------ internal/metrics/databaseclaim.go | 66 ++++++++++++++++++++++++++++++ pkg/databaseclaim/databaseclaim.go | 38 +---------------- pkg/metrics/metrics.go | 17 +++----- 4 files changed, 88 insertions(+), 61 deletions(-) create mode 100644 internal/metrics/databaseclaim.go diff --git a/cmd/main.go b/cmd/main.go index 84b508d1..a4ce2be3 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -25,28 +25,26 @@ import ( // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. - _ "k8s.io/client-go/plugin/pkg/client/auth" - + persistanceinfobloxcomv1alpha1 "github.com/infobloxopen/db-controller/api/persistance.infoblox.com/v1alpha1" + persistancev1 "github.com/infobloxopen/db-controller/api/v1" + "github.com/infobloxopen/db-controller/internal/controller" + "github.com/infobloxopen/db-controller/internal/metrics" + mutating "github.com/infobloxopen/db-controller/internal/webhook" + "github.com/infobloxopen/db-controller/pkg/config" + "github.com/infobloxopen/db-controller/pkg/databaseclaim" + "github.com/infobloxopen/db-controller/pkg/rdsauth" + "github.com/infobloxopen/db-controller/pkg/roleclaim" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + _ "k8s.io/client-go/plugin/pkg/client/auth" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" - persistancev1 "github.com/infobloxopen/db-controller/api/v1" - "github.com/infobloxopen/db-controller/internal/controller" - mutating "github.com/infobloxopen/db-controller/internal/webhook" - "github.com/infobloxopen/db-controller/pkg/config" - "github.com/infobloxopen/db-controller/pkg/databaseclaim" - "github.com/infobloxopen/db-controller/pkg/rdsauth" - "github.com/infobloxopen/db-controller/pkg/roleclaim" - - persistanceinfobloxcomv1alpha1 "github.com/infobloxopen/db-controller/api/persistance.infoblox.com/v1alpha1" // +kubebuilder:scaffold:imports - crossplanerdsv1alpha1 "github.com/crossplane-contrib/provider-aws/apis/rds/v1alpha1" crossplanegcpv1beta2 "github.com/upbound/provider-gcp/apis/alloydb/v1beta2" ) @@ -268,8 +266,12 @@ func main() { } } + setupLog.Info("starting metrics updater") + ctx := ctrl.SetupSignalHandler() + go metrics.StartUpdater(ctx, mgr.GetClient()) + setupLog.Info("starting manager") - if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + if err := mgr.Start(ctx); err != nil { setupLog.Error(err, "problem running manager") os.Exit(1) } diff --git a/internal/metrics/databaseclaim.go b/internal/metrics/databaseclaim.go new file mode 100644 index 00000000..b6ef232f --- /dev/null +++ b/internal/metrics/databaseclaim.go @@ -0,0 +1,66 @@ +package metrics + +import ( + "context" + "time" + + "github.com/go-logr/logr" + v1 "github.com/infobloxopen/db-controller/api/v1" + "github.com/infobloxopen/db-controller/pkg/metrics" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// StartUpdater starts a metrics updater that updates the metrics every minute. +func StartUpdater(ctx context.Context, client client.Client) { + ticker := time.NewTicker(1 * time.Minute) + defer ticker.Stop() + + logr := log.FromContext(ctx).WithName("metrics-updater") + + for { + select { + case <-ctx.Done(): + logr.Info("shutting down metrics updater") + return + case <-ticker.C: + updateMetrics(ctx, logr, client) + } + } +} + +func updateMetrics(ctx context.Context, log logr.Logger, client client.Client) { + + var databaseClaims v1.DatabaseClaimList + if err := client.List(ctx, &databaseClaims); err != nil { + log.Error(err, "unable to list database claims") + return + } + + metrics.TotalDatabaseClaims.Reset() + metrics.ErrorStateClaims.Reset() + metrics.MigrationStateClaims.Reset() + metrics.ActiveDBState.Reset() + + for _, dbClaim := range databaseClaims.Items { + metrics.TotalDatabaseClaims.WithLabelValues(dbClaim.Namespace).Inc() + + if dbClaim.Status.Error != "" { + metrics.ErrorStateClaims.WithLabelValues(dbClaim.Namespace).Inc() + } + + if dbClaim.Status.MigrationState != "" { + metrics.MigrationStateClaims.WithLabelValues(dbClaim.Namespace, dbClaim.Status.MigrationState).Inc() + } + + if dbClaim.Status.ActiveDB.DbState != "" { + metrics.ActiveDBState.WithLabelValues(dbClaim.Namespace, string(dbClaim.Status.ActiveDB.DbState)).Inc() + } + + if dbClaim.Spec.UseExistingSource != nil && *dbClaim.Spec.UseExistingSource { + metrics.ExistingSourceClaims.WithLabelValues(dbClaim.Namespace, "true").Inc() + } else { + metrics.ExistingSourceClaims.WithLabelValues(dbClaim.Namespace, "false").Inc() + } + } +} diff --git a/pkg/databaseclaim/databaseclaim.go b/pkg/databaseclaim/databaseclaim.go index 587f2118..ead2665e 100644 --- a/pkg/databaseclaim/databaseclaim.go +++ b/pkg/databaseclaim/databaseclaim.go @@ -131,13 +131,6 @@ func (r *DatabaseClaimReconciler) Reconcile(ctx context.Context, req ctrl.Reques } } - // Track if the claim is using an existing source. - if dbClaim.Spec.UseExistingSource != nil && *dbClaim.Spec.UseExistingSource { - metrics.ExistingSourceClaims.WithLabelValues("true").Inc() - } else { - metrics.ExistingSourceClaims.WithLabelValues("false").Inc() - } - // Avoid updates to the claim until we know we should be looking at it if !isClassPermitted(r.Config.Class, dbClaim.Spec.Class) { @@ -147,9 +140,6 @@ func (r *DatabaseClaimReconciler) Reconcile(ctx context.Context, req ctrl.Reques if err := validateDBClaim(&dbClaim); err != nil { res, err := r.manageError(ctx, &dbClaim, err) - if dbClaim.Status.Error != "" { - metrics.ErrorStateClaims.Inc() - } // TerminalError, do not requeue return res, reconcile.TerminalError(err) } @@ -169,19 +159,13 @@ func (r *DatabaseClaimReconciler) Reconcile(ctx context.Context, req ctrl.Reques reqInfo, err := NewRequestInfo(ctx, r.Config.Viper, &dbClaim) if err != nil { - res, err := r.manageError(ctx, &dbClaim, err) - if dbClaim.Status.Error != "" { - metrics.ErrorStateClaims.Inc() - } - return res, err + return r.manageError(ctx, &dbClaim, err) } // name of our custom finalizer dbFinalizerName := "databaseclaims.persistance.atlas.infoblox.com/finalizer" if !dbClaim.ObjectMeta.DeletionTimestamp.IsZero() { - // Increment total claims count. - metrics.TotalDatabaseClaimsDeleted.Inc() // The object is being deleted if controllerutil.ContainsFinalizer(&dbClaim, dbFinalizerName) { @@ -238,29 +222,9 @@ func (r *DatabaseClaimReconciler) Reconcile(ctx context.Context, req ctrl.Reques } res, err := r.executeDbClaimRequest(ctx, &reqInfo, &dbClaim) if err != nil { - if dbClaim.Status.Error != "" { - metrics.ErrorStateClaims.Inc() - } return res, err } - // Track migration state. - if dbClaim.Status.MigrationState != "" { - metrics.MigrationStateClaims.WithLabelValues(dbClaim.Status.MigrationState).Inc() - } - - // Track active DB state. - if dbClaim.Status.ActiveDB.DbState != "" { - metrics.ActiveDBState.WithLabelValues(string(dbClaim.Status.ActiveDB.DbState)).Inc() - } - - var databaseClaims v1.DatabaseClaimList - if err := r.List(ctx, &databaseClaims); err != nil { - logr.Error(err, "unable to list database claims") - } else { - metrics.TotalDatabaseClaims.WithLabelValues("total_claims").Set(float64(len(databaseClaims.Items))) - } - return res, nil } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index a8eb00f7..8acf7b44 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -87,40 +87,35 @@ var ( Name: "dbcontroller_total_database_claims", Help: "Total number of database claims", }, - []string{"total_claims"}, - ) - TotalDatabaseClaimsDeleted = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "dbcontroller_total_database_claims_deleted", - Help: "Total number of database claims marked for deletion", - }, + []string{"namespace"}, ) ExistingSourceClaims = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "dbcontroller_existing_source_claims", Help: "Number of database claims using existing source", }, - []string{"use_existing_source"}, + []string{"namespace", "use_existing_source"}, ) - ErrorStateClaims = prometheus.NewGauge( + ErrorStateClaims = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "dbcontroller_error_state_claims", Help: "Number of database claims in error state", }, + []string{"namespace"}, ) MigrationStateClaims = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "dbcontroller_migration_state_claims", Help: "Number of database claims in each migration state", }, - []string{"migration_state"}, + []string{"namespace", "migration_state"}, ) ActiveDBState = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Name: "dbcontroller_active_db_state", Help: "State of active databases", }, - []string{"db_state"}, + []string{"namespace", "db_state"}, ) )