From 04c35d30360d477b9acee73c141afad6359ab8af Mon Sep 17 00:00:00 2001 From: Artem Gavrilov Date: Thu, 7 Sep 2023 11:54:04 +0200 Subject: [PATCH] PMM-12384 Backup artifact metrics (#2428) * PMM-12384 Add backup artifact metrics * PMM-12384 Fixes * PMM_12384 Fix * PMM-12384 Remove TODO * PMM-12384 Refactoring * Update managed/data/iatemplates/backup_error.yml --- dev/mongo-rs-backups/docker-compose.yml | 1 + managed/cmd/pmm-managed/main.go | 3 + managed/data/iatemplates/backup_error.yml | 12 +++ managed/services/backup/metrics.go | 113 ++++++++++++++++++++++ 4 files changed, 129 insertions(+) create mode 100644 managed/data/iatemplates/backup_error.yml create mode 100644 managed/services/backup/metrics.go diff --git a/dev/mongo-rs-backups/docker-compose.yml b/dev/mongo-rs-backups/docker-compose.yml index 458674b664..001be6355c 100644 --- a/dev/mongo-rs-backups/docker-compose.yml +++ b/dev/mongo-rs-backups/docker-compose.yml @@ -1,6 +1,7 @@ networks: pmm_default: name: pmm_default + external: true services: mongo1: diff --git a/managed/cmd/pmm-managed/main.go b/managed/cmd/pmm-managed/main.go index a27c774228..cd203b2e31 100644 --- a/managed/cmd/pmm-managed/main.go +++ b/managed/cmd/pmm-managed/main.go @@ -932,6 +932,9 @@ func main() { //nolint:cyclop,maintidx dbaasClient := dbaas.NewClient(*dbaasControllerAPIAddrF) compatibilityService := backup.NewCompatibilityService(db, versioner) backupService := backup.NewService(db, jobsService, agentService, compatibilityService, pbmPITRService) + backupMetricsCollector := backup.NewMetricsCollector(db) + prom.MustRegister(backupMetricsCollector) + schedulerService := scheduler.New(db, backupService) versionCache := versioncache.New(db, versioner) emailer := alertmanager.NewEmailer(logrus.WithField("component", "alertmanager-emailer").Logger) diff --git a/managed/data/iatemplates/backup_error.yml b/managed/data/iatemplates/backup_error.yml new file mode 100644 index 0000000000..592c109c9c --- /dev/null +++ b/managed/data/iatemplates/backup_error.yml @@ -0,0 +1,12 @@ +--- +templates: + - name: pmm_backup_error + version: 1 + summary: Backup failed (Tech preview) + expr: 'pmm_managed_backups_artifacts{status="error"} == bool 1' + for: 1m + severity: error + annotations: + description: |- + Failed to create a backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. + summary: Failed to create a backup artifact '{{ $labels.artifact_name}}' on service '{{ $labels.service_id }}'. diff --git a/managed/services/backup/metrics.go b/managed/services/backup/metrics.go new file mode 100644 index 0000000000..b97148a958 --- /dev/null +++ b/managed/services/backup/metrics.go @@ -0,0 +1,113 @@ +// Copyright (C) 2017 Percona LLC +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package backup + +import ( + "context" + "time" + + "github.com/pkg/errors" + prom "github.com/prometheus/client_golang/prometheus" + "github.com/sirupsen/logrus" + "gopkg.in/reform.v1" + + "github.com/percona/pmm/managed/models" +) + +const ( + requestTimeout = 3 * time.Second + artifactExists float64 = 1 + prometheusNamespace = "pmm_managed" + prometheusSubsystem = "backups" +) + +type MetricsCollector struct { + db *reform.DB + l *logrus.Entry + + mArtifactsDesc *prom.Desc +} + +func NewMetricsCollector(db *reform.DB) *MetricsCollector { + return &MetricsCollector{ + db: db, + l: logrus.WithField("component", "backups/metrics"), + mArtifactsDesc: prom.NewDesc( + prom.BuildFQName(prometheusNamespace, prometheusSubsystem, "artifacts"), + "Artifacts", + []string{ + "artifact_id", "artifact_name", "artifact_vendor", "service_id", "service_name", + "type", "db_version", "data_model", "mode", "status", + }, + nil), + } +} + +func (c *MetricsCollector) Describe(ch chan<- *prom.Desc) { + ch <- c.mArtifactsDesc +} + +func (c *MetricsCollector) Collect(ch chan<- prom.Metric) { + ctx, cancelCtx := context.WithTimeout(context.Background(), requestTimeout) + defer cancelCtx() + + var artifacts []*models.Artifact + var services map[string]*models.Service + errTx := c.db.InTransactionContext(ctx, nil, func(t *reform.TX) error { + var err error + artifacts, err = models.FindArtifacts(t.Querier, models.ArtifactFilters{}) + if err != nil { + return errors.WithStack(err) + } + + serviceIDs := make([]string, 0, len(artifacts)) + for _, artifact := range artifacts { + serviceIDs = append(serviceIDs, artifact.ServiceID) + } + + services, err = models.FindServicesByIDs(t.Querier, serviceIDs) + if err != nil { + return errors.WithStack(err) + } + return nil + }) + if errTx != nil { + c.l.Warnf("Failed to get artifacts: %v", errTx) + return + } + + for _, artifact := range artifacts { + var serviceName string + if service, ok := services[artifact.ServiceID]; ok { + serviceName = service.ServiceName + } + + ch <- prom.MustNewConstMetric( + c.mArtifactsDesc, + prom.GaugeValue, + artifactExists, + artifact.ID, + artifact.Name, + artifact.Vendor, + artifact.ServiceID, + serviceName, + string(artifact.Type), + artifact.DBVersion, + string(artifact.DataModel), + string(artifact.Mode), + string(artifact.Status)) + } +}