diff --git a/agent/consul/leader_connect.go b/agent/consul/leader_connect.go index 1b724d2302ce..3a2a68ae1159 100644 --- a/agent/consul/leader_connect.go +++ b/agent/consul/leader_connect.go @@ -5,6 +5,7 @@ import ( "fmt" "time" + "github.com/armon/go-metrics" "golang.org/x/time/rate" "github.com/hashicorp/consul/agent/connect/ca" @@ -36,6 +37,7 @@ func (s *Server) startConnectLeader(ctx context.Context) error { s.caManager.Start(ctx) s.leaderRoutineManager.Start(ctx, caRootPruningRoutineName, s.runCARootPruning) + s.leaderRoutineManager.Start(ctx, caRootMetricRoutineName, emitCAExpirationMetrics(s)) return s.startIntentionConfigEntryMigration(ctx) } @@ -139,6 +141,47 @@ func (s *Server) pruneCARoots() error { return err } +func emitCAExpirationMetrics(s *Server) func(ctx context.Context) error { + key := []string{"mesh", "root-ca", "expiry"} + labels := []metrics.Label{ + {Name: "datacenter", Value: s.config.Datacenter}, + } + + emit := func() error { + if !s.config.ConnectEnabled { + return nil + } + + state := s.fsm.State() + _, root, err := state.CARootActive(nil) + if err != nil { + return fmt.Errorf("failed to retrieve root CA: %w", err) + } + + expiry := time.Until(root.NotAfter) / time.Second + metrics.SetGaugeWithLabels(key, float32(expiry), labels) + return nil + } + + return func(ctx context.Context) error { + ticker := time.NewTicker(time.Hour) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + if err := emit(); err != nil { + s.loggers. + Named(logging.Connect). + Info("failed to emit root CA expiry metric", "error", err) + } + } + } + } +} + // retryLoopBackoff loops a given function indefinitely, backing off exponentially // upon errors up to a maximum of maxRetryBackoff seconds. func retryLoopBackoff(ctx context.Context, loopFn func() error, errFn func(error)) { diff --git a/agent/consul/server.go b/agent/consul/server.go index a53916aa4881..b96230d7c08c 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -102,6 +102,7 @@ const ( aclTokenReapingRoutineName = "acl token reaping" aclUpgradeRoutineName = "legacy ACL token upgrade" caRootPruningRoutineName = "CA root pruning" + caRootMetricRoutineName = "CA root expiration metric" configReplicationRoutineName = "config entry replication" federationStateReplicationRoutineName = "federation state replication" federationStateAntiEntropyRoutineName = "federation state anti-entropy" diff --git a/website/content/docs/agent/telemetry.mdx b/website/content/docs/agent/telemetry.mdx index 90c3b9d2c8f3..09ee3b504c9e 100644 --- a/website/content/docs/agent/telemetry.mdx +++ b/website/content/docs/agent/telemetry.mdx @@ -458,6 +458,7 @@ These metrics give insight into the health of the cluster as a whole. | `consul.catalog.connect.query-tag..` | Increments for each connect-based catalog query for the given service with the given tag. | queries | counter | | `consul.catalog.connect.query-tags..` | Increments for each connect-based catalog query for the given service with the given tags. | queries | counter | | `consul.catalog.connect.not-found.` | Increments for each connect-based catalog query where the given service could not be found. | queries | counter | +| `consul.mesh.root-ca.expiry` | The number of seconds until the root CA expires, updated every hour. | seconds | gauge | ## Connect Built-in Proxy Metrics