Skip to content

Commit

Permalink
Feature: add ChangeMembers::SetNodes (#876)
Browse files Browse the repository at this point in the history
During dynamic cluster changes, we sometimes need to update an existing
node, for example changing its network address.

Adding `SetNodes` variant to `ChangeMembers` allows replacing an
existing node directly.
However, this also carries risk of creating a split brain scenario if
used incorrectly.

- Fix: #875
  • Loading branch information
drmingdrmer authored Jun 22, 2023
1 parent 6098f5c commit d629dbe
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 1 deletion.
15 changes: 15 additions & 0 deletions openraft/src/change_members.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,23 @@ pub enum ChangeMembers<NID: NodeId, N: Node> {
ReplaceAllVoters(BTreeSet<NID>),

/// Add nodes to membership, as learners.
///
/// it **WONT** replace existing node.
///
/// Prefer using this variant instead of `SetNodes` whenever possible, as `AddNodes` ensures
/// safety, whereas incorrect usage of `SetNodes` can result in a brain split.
/// See: [Update-Node](`crate::docs::cluster_control::dynamic_membership#update-node`)
AddNodes(BTreeMap<NID, N>),

/// Add or replace nodes in membership config.
///
/// it **WILL** replace existing node.
///
/// Prefer using `AddNodes` instead of `SetNodes` whenever possible, as `AddNodes` ensures
/// safety, whereas incorrect usage of `SetNodes` can result in a brain split.
/// See: [Update-Node](`crate::docs::cluster_control::dynamic_membership#update-node`)
SetNodes(BTreeMap<NID, N>),

/// Remove nodes from membership.
///
/// If a node is still a voter, it returns
Expand Down
45 changes: 45 additions & 0 deletions openraft/src/docs/cluster_control/dynamic-membership.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,51 @@ tolerates a minority member crash.

To read more about Openraft's [Extended Membership Algorithm][`extended_membership`].


## Update Node

To update a node, such as altering its network address,
the application calls [`Raft::change_membership()`][].
The initial argument should be set to [`ChangeMembers::SetNodes(BTreeMap<NodeId,Node>)`][`ChangeMembers::SetNodes`].

**Warning: Misusing `SetNodes` could lead to a split-brain situation**:

### Brain split

When Updating node network addresses,
brain split could occur if the new address belongs to another node,
leading to two elected leaders.

Consider a 3-node cluster (`a, b, c`, with addresses `x, y, z`) and an
uninitialized node `d` with address `w`:

```text
a: x
b: y
c: z
d: w
```

Mistakenly updating `b`'s address from `y` to `w` would enable both `x, y` and `z, w` to form quorums and elect leaders:

- `c` proposes ChangeMembership: `{a:x, b:w, c:z}`;
- `c, d` grant `c`;

- `c` elects itself as leader
- `c, d` confirm `c` as leader

- `a` elects itself as leader
- `a, b` confirm `a` as leader


Directly updating node addresses with `ChangeMembers::SetNodes`
should be replaced with `ChangeMembers::RemoveNodes` and `ChangeMembers::RemoveNodes` whenever possible.

Do not use `ChangeMembers::SetNodes` unless you know what you are doing.


[`ChangeMembers::SetNodes`]: `crate::change_members::ChangeMembers::SetNodes`
[`Raft::add_learner()`]: `crate::Raft::add_learner`
[`Raft::change_membership()`]: `crate::Raft::change_membership`
[`extended_membership`]: `crate::docs::data::extended_membership`
23 changes: 23 additions & 0 deletions openraft/src/membership/membership.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,12 @@ where
}
self
}
ChangeMembers::SetNodes(set_nodes) => {
for (node_id, node) in set_nodes.into_iter() {
self.nodes.insert(node_id, node);
}
self
}
ChangeMembers::RemoveNodes(remove_node_ids) => {
for node_id in remove_node_ids.iter() {
self.nodes.remove(node_id);
Expand Down Expand Up @@ -517,6 +523,23 @@ mod tests {
);
}

// SetNodes: Ok
{
let m = || Membership::<u64, u64> {
configs: vec![btreeset! {1,2}],
nodes: btreemap! {1=>1,2=>2,3=>3},
};

let res = m().change(ChangeMembers::SetNodes(btreemap! {3=>30, 4=>40}), false);
assert_eq!(
Ok(Membership::<u64, u64> {
configs: vec![btreeset! {1,2}],
nodes: btreemap! {1=>1,2=>2,3=>30, 4=>40}
}),
res
);
}

// RemoveNodes: can not remove node for voter
{
let res = m().change(ChangeMembers::RemoveNodes(btreeset! {2}), false);
Expand Down
24 changes: 24 additions & 0 deletions openraft/src/membership/membership_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,30 @@ fn test_membership_add_learner() -> anyhow::Result<()> {
Ok(())
}

#[test]
fn test_membership_update_nodes() -> anyhow::Result<()> {
let node = |s: &str| TestNode {
addr: s.to_string(),
data: Default::default(),
};

let m_1_2 = Membership::<u64, TestNode>::new_unchecked(
vec![btreeset! {1}, btreeset! {2}],
btreemap! {1=>node("1"), 2=>node("2")},
);

let m_1_2_3 = m_1_2.change(ChangeMembers::SetNodes(btreemap! {2=>node("20"), 3=>node("30")}), true)?;
assert_eq!(
Membership::<u64, TestNode>::new_unchecked(
vec![btreeset! {1}, btreeset! {2}],
btreemap! {1=>node("1"), 2=>node("20"), 3=>node("30")}
),
m_1_2_3
);

Ok(())
}

#[test]
fn test_membership_extend_nodes() -> anyhow::Result<()> {
let node = |s: &str| TestNode {
Expand Down
33 changes: 33 additions & 0 deletions tests/tests/membership/t11_add_learner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ use std::sync::Arc;
use std::time::Duration;

use anyhow::Result;
use maplit::btreemap;
use maplit::btreeset;
use openraft::error::ChangeMembershipError;
use openraft::error::ClientWriteError;
use openraft::error::InProgress;
use openraft::storage::RaftLogReaderExt;
use openraft::ChangeMembers;
use openraft::CommittedLeaderId;
use openraft::Config;
use openraft::LogId;
Expand Down Expand Up @@ -141,6 +143,37 @@ async fn add_learner_non_blocking() -> Result<()> {
Ok(())
}

#[async_entry::test(worker_threads = 8, init = "init_default_ut_tracing()", tracing_span = "debug")]
async fn add_learner_with_set_nodes() -> Result<()> {
// Add learners and update nodes with ChangeMembers::SetNodes
// Node updating is ensured by unit tests of ChangeMembers
let config = Arc::new(
Config {
replication_lag_threshold: 0,
enable_tick: false,
..Default::default()
}
.validate()?,
);
let mut router = RaftRouter::new(config.clone());

let log_index = router.new_cluster(btreeset! {0,1,2}, btreeset! {}).await?;

tracing::info!(log_index, "--- set node 2 and 4");
{
router.new_raft_node(4).await;

let raft = router.get_raft_handle(&0)?;
raft.change_membership(ChangeMembers::SetNodes(btreemap! {2=>(), 4=>()}), true).await?;

let metrics = router.get_raft_handle(&0)?.metrics().borrow().clone();
let node_ids = metrics.membership_config.membership().nodes().map(|x| *x.0).collect::<Vec<_>>();
assert_eq!(vec![0, 1, 2, 4], node_ids);
}

Ok(())
}

/// When the previous membership is not yet committed, add-learner should fail.
///
/// Because adding learner is also a change-membership operation, a new membership config log will
Expand Down
2 changes: 1 addition & 1 deletion tests/tests/membership/t20_change_membership.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use openraft::ServerState;
use crate::fixtures::init_default_ut_tracing;
use crate::fixtures::RaftRouter;

/// When a change-membership log is committed, the membership_state should be updated.
/// When a change-membership log is committed, the `RaftState.membership_state` should be updated.
#[async_entry::test(worker_threads = 3, init = "init_default_ut_tracing()", tracing_span = "debug")]
async fn update_membership_state() -> anyhow::Result<()> {
let config = Arc::new(
Expand Down

0 comments on commit d629dbe

Please sign in to comment.