Skip to content

Commit

Permalink
Improve: fix: delay election if a greater last log id is seen
Browse files Browse the repository at this point in the history
If this node sees a greater last-log-id on another node, it will be less
likely to be elected as a leader.
In this case, it is necessary to sleep for a longer period of time
`smaller_log_timeout` so that other nodes with a greater last-log-id
have a chance to elect themselves.

Fix: such as state should be kept until next election, i.e., it should
be a field of `Engine` instead of a `field` of `internal_server_state`.
And this value should be greater than the `election_timeout` of every other node.
  • Loading branch information
drmingdrmer committed Mar 14, 2023
1 parent ffa876c commit 54aea8a
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 50 deletions.
10 changes: 3 additions & 7 deletions openraft/src/core/raft_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1173,10 +1173,8 @@ impl<C: RaftTypeConfig, N: RaftNetworkFactory<C>, S: RaftStorage<C>> RaftCore<C,
timer_config.election_timeout
};

if let Some(l) = self.engine.internal_server_state.leading() {
if l.is_there_greater_log() {
election_timeout += timer_config.election_timeout;
}
if self.engine.is_there_greater_log() {
election_timeout += timer_config.election_timeout;
}

tracing::debug!(
Expand All @@ -1197,9 +1195,7 @@ impl<C: RaftTypeConfig, N: RaftNetworkFactory<C>, S: RaftStorage<C>> RaftCore<C,
}

// Every time elect, reset this flag.
if let Some(l) = self.engine.internal_server_state.leading_mut() {
l.reset_greater_log();
}
self.engine.reset_greater_log();

tracing::info!("do trigger election");
self.engine.elect();
Expand Down
47 changes: 31 additions & 16 deletions openraft/src/engine/engine_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ impl<NID: NodeId> EngineConfig<NID> {
max_payload_entries: config.max_payload_entries,
timer_config: time_state::Config {
election_timeout,
smaller_log_timeout: Duration::from_millis(config.election_timeout_max),
leader_lease: Duration::from_millis(config.election_timeout_max),
},
}
Expand Down Expand Up @@ -132,6 +133,13 @@ where
/// The state of this raft node.
pub(crate) state: Valid<RaftState<NID, N>>,

// TODO: add a Voting state as a container.
/// Whether a greater log id is seen during election.
///
/// If it is true, then this node **may** not become a leader therefore the election timeout
/// should be greater.
pub(crate) seen_greater_log: bool,

pub(crate) timer: TimeState,

/// The internal server state used by Engine.
Expand All @@ -151,6 +159,7 @@ where
Self {
config,
state: Valid::new(init_state),
seen_greater_log: false,
timer: time_state::TimeState::new(now),
internal_server_state: InternalServerState::default(),
output: EngineOutput::default(),
Expand Down Expand Up @@ -421,24 +430,14 @@ where
debug_assert!(self.state.membership_state.effective().is_voter(&self.config.id));

// If peer's vote is greater than current vote, revert to follower state.
if &resp.vote > self.state.vote_ref() {
self.state.vote.update(*self.timer.now(), resp.vote);
self.output.push_command(Command::SaveVote {
vote: *self.state.vote_ref(),
});
}

// Seen a higher log.
//
// Explicitly ignore the returned error:
// resp.vote being not greater than mine is all right.
let _ = self.vote_handler().handle_message_vote(&resp.vote);

// Seen a higher log. Record it so that the next election will be delayed for a while.
if resp.last_log_id.as_ref() > self.state.last_log_id() {
match self.internal_server_state.leading_mut() {
None => {
unreachable!(
"when a vote resp is received, and the vote did not change, it has to be in a electing state"
)
}
Some(l) => l.set_greater_log(),
}
self.set_greater_log();
}
}

Expand Down Expand Up @@ -591,6 +590,22 @@ where
}
}

pub(crate) fn is_there_greater_log(&self) -> bool {
self.seen_greater_log
}

/// Set that there is greater last log id found.
///
/// In such a case, this node should not try to elect aggressively.
pub(crate) fn set_greater_log(&mut self) {
self.seen_greater_log = true;
}

/// Clear the flag of that there is greater last log id.
pub(crate) fn reset_greater_log(&mut self) {
self.seen_greater_log = false;
}

// Only used by tests
#[allow(dead_code)]
pub(crate) fn calc_server_state(&self) -> ServerState {
Expand Down
7 changes: 4 additions & 3 deletions openraft/src/engine/handle_vote_resp_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ fn test_handle_vote_resp() -> anyhow::Result<()> {
assert!(eng.output.commands.is_empty());
}

tracing::info!("--- seen a higher vote. keep trying in candidate state");
// TODO: when seeing a higher vote, keep trying until a majority of higher votes are seen.
tracing::info!("--- seen a higher vote. revert to follower");
{
let mut eng = eng();
eng.config.id = 1;
Expand All @@ -126,9 +127,9 @@ fn test_handle_vote_resp() -> anyhow::Result<()> {
});

assert_eq!(Vote::new(3, 2), *eng.state.vote_ref());
assert!(eng.internal_server_state.is_leading());
assert!(eng.internal_server_state.is_following());

assert_eq!(ServerState::Candidate, eng.state.server_state);
assert_eq!(ServerState::Follower, eng.state.server_state);
assert_eq!(
MetricsChangeFlags {
replication: false,
Expand Down
9 changes: 9 additions & 0 deletions openraft/src/engine/time_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ pub(crate) struct Config {
/// expired.
pub(crate) election_timeout: Duration,

/// If this node has a smaller last-log-id than others, it will be less likely to be elected as
/// a leader. In this case, it is necessary to sleep for a longer period of time
/// `smaller_log_timeout` so that other nodes with a greater last-log-id have a chance to elect
/// themselves.
///
/// Note that this value should be greater than the `election_timeout` of every other node.
pub(crate) smaller_log_timeout: Duration,

/// The duration of an active leader's lease.
///
/// When a follower or learner perceives an active leader, such as by receiving an AppendEntries
Expand All @@ -20,6 +28,7 @@ impl Default for Config {
fn default() -> Self {
Self {
election_timeout: Duration::from_millis(150),
smaller_log_timeout: Duration::from_millis(200),
leader_lease: Duration::from_millis(150),
}
}
Expand Down
23 changes: 0 additions & 23 deletions openraft/src/leader/leader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,6 @@ pub(crate) struct Leader<NID: NodeId, QS: QuorumSet<NID>> {
/// The vote this leader works in.
pub(crate) vote: Vote<NID>,

/// Whether a greater log id is seen during election.
///
/// If it is true, then this node **may** not become a leader therefore the election timeout
/// should be greater.
seen_greater_log: bool,

/// Which nodes have granted the the vote of this node.
pub(crate) vote_granted_by: BTreeSet<NID>,

Expand All @@ -55,7 +49,6 @@ where
) -> Self {
Self {
vote,
seen_greater_log: false,
vote_granted_by: BTreeSet::new(),
progress: VecProgress::new(
quorum_set,
Expand All @@ -65,22 +58,6 @@ where
}
}

pub(crate) fn is_there_greater_log(&self) -> bool {
self.seen_greater_log
}

/// Set that there is greater last log id found.
///
/// In such a case, this node should not try to elect aggressively.
pub(crate) fn set_greater_log(&mut self) {
self.seen_greater_log = true;
}

/// Clear the flag of that there is greater last log id.
pub(crate) fn reset_greater_log(&mut self) {
self.seen_greater_log = false;
}

/// Update that a node has granted the vote.
pub(crate) fn grant_vote_by(&mut self, target: NID) {
self.vote_granted_by.insert(target);
Expand Down
2 changes: 1 addition & 1 deletion openraft/tests/elect/t10_elect_compare_last_log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::fixtures::RaftRouter;

/// The last_log in a vote request must be greater or equal than the local one.
///
/// - Fake a cluster with two node: with last log {2,1} and {1,2}.
/// - Fake a cluster with two node: with last log {2,1} and {1,1}.
/// - Bring up the cluster and only node 0 can become leader.
#[async_entry::test(worker_threads = 8, init = "init_default_ut_tracing()", tracing_span = "debug")]
async fn elect_compare_last_log() -> Result<()> {
Expand Down

0 comments on commit 54aea8a

Please sign in to comment.