From 7bba4911cf057f118b3100448538003819fa83d9 Mon Sep 17 00:00:00 2001 From: Dmitri Date: Tue, 22 Jun 2021 20:35:46 +0800 Subject: [PATCH] Observe resumed heartbeat after failures (#467) --- observer.go | 5 +++++ replication.go | 3 +++ 2 files changed, 8 insertions(+) diff --git a/observer.go b/observer.go index 7b3c03cd602..29f2d580298 100644 --- a/observer.go +++ b/observer.go @@ -34,6 +34,11 @@ type FailedHeartbeatObservation struct { LastContact time.Time } +// ResumedHeartbeatObservation is sent when a node resumes to heartbeat with the leader following failures +type ResumedHeartbeatObservation struct { + PeerID ServerID +} + // nextObserverId is used to provide a unique ID for each observer to aid in // deregistration. var nextObserverID uint64 diff --git a/replication.go b/replication.go index 5b5d8fa12b7..c11e8e56144 100644 --- a/replication.go +++ b/replication.go @@ -388,6 +388,9 @@ func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) { case <-stopCh: } } else { + if failures > 0 { + r.observe(ResumedHeartbeatObservation{PeerID: s.peer.ID}) + } s.setLastContact() failures = 0 labels := []metrics.Label{{Name: "peer_id", Value: string(s.peer.ID)}}