-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Transfer leadership when establishLeadership fails #5247
Changes from 7 commits
06f41a8
fa43afb
6cf75c2
301e184
5767922
4071ae2
1534af3
ed12d67
8e47230
d534f10
09a1a32
c866127
c8e2774
48f28c8
2a97321
5ed8df5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -77,7 +77,10 @@ func (s *Server) monitorLeadership() { | |||||
leaderLoop.Add(1) | ||||||
go func(ch chan struct{}) { | ||||||
defer leaderLoop.Done() | ||||||
s.leaderLoop(ch) | ||||||
err := s.leaderLoop(ch) | ||||||
if err != nil { | ||||||
s.leadershipTransfer() | ||||||
} | ||||||
}(weAreLeaderCh) | ||||||
s.logger.Printf("[INFO] consul: cluster leadership acquired") | ||||||
|
||||||
|
@@ -126,9 +129,23 @@ func (s *Server) monitorLeadership() { | |||||
} | ||||||
} | ||||||
|
||||||
func (s *Server) leadershipTransfer() { | ||||||
retryCount := 3 | ||||||
for i := 0; i < retryCount; i++ { | ||||||
freddygv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
future := s.raft.LeadershipTransfer() | ||||||
if err := future.Error(); err != nil { | ||||||
s.logger.Printf("[ERR] consul: failed to transfer leadership attempt %d/%d: %v", i, retryCount, err) | ||||||
} else { | ||||||
s.logger.Printf("[ERR] consul: successfully transferred leadership attempt %d/%d", i, retryCount) | ||||||
break | ||||||
} | ||||||
|
||||||
} | ||||||
} | ||||||
|
||||||
// leaderLoop runs as long as we are the leader to run various | ||||||
// maintenance activities | ||||||
func (s *Server) leaderLoop(stopCh chan struct{}) { | ||||||
func (s *Server) leaderLoop(stopCh chan struct{}) error { | ||||||
// Fire a user event indicating a new leader | ||||||
payload := []byte(s.config.NodeName) | ||||||
for name, segment := range s.LANSegments() { | ||||||
|
@@ -178,7 +195,7 @@ RECONCILE: | |||||
if err := s.revokeLeadership(); err != nil { | ||||||
s.logger.Printf("[ERR] consul: failed to revoke leadership: %v", err) | ||||||
} | ||||||
goto WAIT | ||||||
return err | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the first usecase: we couldn't establish leadership and instead of retrying it after |
||||||
} | ||||||
establishedLeader = true | ||||||
defer func() { | ||||||
|
@@ -204,7 +221,7 @@ WAIT: | |||||
// down. | ||||||
select { | ||||||
case <-stopCh: | ||||||
return | ||||||
return nil | ||||||
default: | ||||||
} | ||||||
|
||||||
|
@@ -213,17 +230,21 @@ WAIT: | |||||
for { | ||||||
select { | ||||||
case <-stopCh: | ||||||
return | ||||||
return nil | ||||||
case <-s.shutdownCh: | ||||||
return | ||||||
return nil | ||||||
case <-interval: | ||||||
goto RECONCILE | ||||||
case member := <-reconcileCh: | ||||||
s.reconcileMember(member) | ||||||
case index := <-s.tombstoneGC.ExpireCh(): | ||||||
go s.reapTombstones(index) | ||||||
case errCh := <-s.reassertLeaderCh: | ||||||
errCh <- reassert() | ||||||
err := reassert() | ||||||
errCh <- err | ||||||
freddygv marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
if err != nil { | ||||||
return err | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the second usecase: we couldn't reassert and instead of retrying it after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmmm, when does this happen? I don't recall the specifics and wonder if by failing and stepping down we might end up causing new leadership stability issues? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lines 245 to 246 in 09a1a32
This happens after snapshot restore. Since the agent revokes leadership and immediately tries to establish it again, there is the possibility that it fails. When it does we are in the same situation as above - raft thinks this agent is a leader, but consul disagrees. |
||||||
} | ||||||
} | ||||||
} | ||||||
} | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,7 @@ import ( | |
"github.com/hashicorp/consul/sentinel" | ||
"github.com/hashicorp/consul/tlsutil" | ||
"github.com/hashicorp/consul/types" | ||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/raft" | ||
raftboltdb "github.com/hashicorp/raft-boltdb" | ||
"github.com/hashicorp/serf/serf" | ||
|
@@ -540,7 +541,12 @@ func (s *Server) setupRaft() error { | |
|
||
// Make sure we set the LogOutput. | ||
s.config.RaftConfig.LogOutput = s.config.LogOutput | ||
s.config.RaftConfig.Logger = s.logger | ||
raftLogger := hclog.New(&hclog.LoggerOptions{ | ||
Name: "raft", | ||
Level: hclog.LevelFromString(s.config.LogLevel), | ||
Output: s.config.LogOutput, | ||
}) | ||
s.config.RaftConfig.Logger = raftLogger | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Raft logging changed and we need to provide the hclogger now. |
||
|
||
// Versions of the Raft protocol below 3 require the LocalID to match the network | ||
// address of the transport. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if leadership transfer fails? We exit the leader loop anyway and no longer act as the leader, but does raft still think we are the leader? That seems like a bad case to be in - roughly the same as the bug this is meant to be fixing although we at least made an attempt at telling raft we didn't want to be leader any more.
I can think of a couple of possible options here:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made a couple of changes to my PR, so that
leaderLoop
is only left ifleadershipTransfer
was successful. I think that mitigates the issues you are talking about.What happens now is that in case
leadershipTransfer
fails, the agent stays inleaderLoop
and waits untilReconcileInterval
to retry toestablishLeadership
(and totransferLeadership
in case it fails).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is OK although
ReconcileInterval
is 60 seconds which seems a lot to wait around for when we know the cluster is down. 🤔Should we instead consider looping indefinitely retrying every 5 seconds or something?
I think this is way better than before and eventually it should recover so it's good just wondering if it's easy to make that better quicker?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I reset the
interval
to 5 seconds when transfer leadership fails so that it retries establishLeadership again faster than before.