From 57f85edf997b2ac5caf3720977898fbf90013f64 Mon Sep 17 00:00:00 2001 From: Zixiong Liu Date: Mon, 7 Mar 2022 20:23:49 +0800 Subject: [PATCH] owner(cdc): clean up stale metrics (#4775) close pingcap/tiflow#4774 --- cdc/owner/owner.go | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cdc/owner/owner.go b/cdc/owner/owner.go index f3b0e843fd9..db24995c367 100644 --- a/cdc/owner/owner.go +++ b/cdc/owner/owner.go @@ -280,6 +280,7 @@ func (o *Owner) cleanUpChangefeed(state *orchestrator.ChangefeedReactorState) { // Bootstrap checks if the state contains incompatible or incorrect information and tries to fix it. func (o *Owner) Bootstrap(state *orchestrator.GlobalReactorState) { log.Info("Start bootstrapping") + o.cleanStaleMetrics() fixChangefeedInfos(state) } @@ -298,6 +299,18 @@ func fixChangefeedInfos(state *orchestrator.GlobalReactorState) { } } +func (o *Owner) cleanStaleMetrics() { + // The gauge metrics of the Owner should be reset + // each time a new owner is launched, in case the previous owner + // has crashed and has not cleaned up the stale metrics values. + changefeedCheckpointTsGauge.Reset() + changefeedCheckpointTsLagGauge.Reset() + changefeedResolvedTsGauge.Reset() + changefeedResolvedTsLagGauge.Reset() + ownerMaintainTableNumGauge.Reset() + changefeedStatusGauge.Reset() +} + func (o *Owner) updateMetrics(state *orchestrator.GlobalReactorState) { // Keep the value of prometheus expression `rate(counter)` = 1 // Please also change alert rule in ticdc.rules.yml when change the expression value. @@ -305,9 +318,6 @@ func (o *Owner) updateMetrics(state *orchestrator.GlobalReactorState) { ownershipCounter.Add(float64(now.Sub(o.lastTickTime)) / float64(time.Second)) o.lastTickTime = now - ownerMaintainTableNumGauge.Reset() - changefeedStatusGauge.Reset() - conf := config.GetGlobalServerConfig() // TODO refactor this piece of code when the new scheduler is stabilized,