diff --git a/Cargo.toml b/Cargo.toml index 52ea6df84..98da263dd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ quick-error = "1.2.2" rand = "0.5.4" hashbrown = "0.1" fail = { version = "0.2", optional = true } +getset = "0.0.6" [dev-dependencies] env_logger = "0.5" diff --git a/benches/benches.rs b/benches/benches.rs index 2746c63a0..ea2be50a0 100644 --- a/benches/benches.rs +++ b/benches/benches.rs @@ -1,5 +1,5 @@ #![allow(dead_code)] // Due to criterion we need this to avoid warnings. -#![cfg_attr(feature = "cargo-clippy", allow(let_and_return))] // Benches often artificially return values. Allow it. +#![cfg_attr(feature = "cargo-clippy", allow(clippy::let_and_return))] // Benches often artificially return values. Allow it. extern crate criterion; extern crate env_logger; diff --git a/benches/suites/progress_set.rs b/benches/suites/progress_set.rs index 6742e1c4b..3c7b4bcc0 100644 --- a/benches/suites/progress_set.rs +++ b/benches/suites/progress_set.rs @@ -11,7 +11,8 @@ pub fn bench_progress_set(c: &mut Criterion) { bench_progress_set_remove(c); bench_progress_set_iter(c); bench_progress_set_get(c); - bench_progress_set_nodes(c); + bench_progress_set_voters(c); + bench_progress_set_learners(c); } fn quick_progress_set(voters: usize, learners: usize) -> ProgressSet { @@ -146,14 +147,40 @@ pub fn bench_progress_set_iter(c: &mut Criterion) { }); } -pub fn bench_progress_set_nodes(c: &mut Criterion) { +pub fn bench_progress_set_voters(c: &mut Criterion) { let bench = |voters, learners| { move |b: &mut Bencher| { let set = quick_progress_set(voters, learners); b.iter(|| { let set = set.clone(); - let agg = set.iter().all(|_| true); - agg + let sum = set.voters().fold(0, |mut sum, _| { + sum += 1; + sum + }); + sum + }); + } + }; + + DEFAULT_RAFT_SETS.iter().for_each(|(voters, learners)| { + c.bench_function( + &format!("ProgressSet::nodes ({}, {})", voters, learners), + bench(*voters, *learners), + ); + }); +} + +pub fn bench_progress_set_learners(c: &mut Criterion) { + let bench = |voters, learners| { + move |b: &mut Bencher| { + let set = quick_progress_set(voters, learners); + b.iter(|| { + let set = set.clone(); + let sum = set.voters().fold(0, |mut sum, _| { + sum += 1; + sum + }); + sum }); } }; diff --git a/benches/suites/raft.rs b/benches/suites/raft.rs index 934d8a795..67214f30e 100644 --- a/benches/suites/raft.rs +++ b/benches/suites/raft.rs @@ -13,10 +13,10 @@ fn quick_raft(voters: usize, learners: usize) -> Raft { let config = Config::new(id); let mut raft = Raft::new(&config, storage).unwrap(); (0..voters).for_each(|id| { - raft.add_node(id as u64); + raft.add_node(id as u64).unwrap(); }); (voters..learners).for_each(|id| { - raft.add_learner(id as u64); + raft.add_learner(id as u64).unwrap(); }); raft } diff --git a/benches/suites/raw_node.rs b/benches/suites/raw_node.rs index b657d208f..917e0ed79 100644 --- a/benches/suites/raw_node.rs +++ b/benches/suites/raw_node.rs @@ -10,8 +10,7 @@ fn quick_raw_node() -> RawNode { let peers = vec![]; let storage = MemStorage::default(); let config = Config::new(id); - let node = RawNode::new(&config, storage, peers).unwrap(); - node + RawNode::new(&config, storage, peers).unwrap() } pub fn bench_raw_node_new(c: &mut Criterion) { diff --git a/proto/eraftpb.proto b/proto/eraftpb.proto index e7c25f50c..664a4fea7 100644 --- a/proto/eraftpb.proto +++ b/proto/eraftpb.proto @@ -30,6 +30,8 @@ message Entry { message SnapshotMetadata { ConfState conf_state = 1; + ConfState pending_membership_change = 4; + uint64 pending_membership_change_index = 5; uint64 index = 2; uint64 term = 3; } @@ -91,11 +93,20 @@ enum ConfChangeType { AddNode = 0; RemoveNode = 1; AddLearnerNode = 2; + BeginMembershipChange = 3; + FinalizeMembershipChange = 4; } message ConfChange { uint64 id = 1; ConfChangeType change_type = 2; + // Used in `AddNode`, `RemoveNode`, and `AddLearnerNode`. uint64 node_id = 3; bytes context = 4; + // Used in `BeginMembershipChange` and `FinalizeMembershipChange`. + ConfState configuration = 5; + // Used in `BeginMembershipChange` and `FinalizeMembershipChange`. + // Because `RawNode::apply_conf_change` takes a `ConfChange` instead of an `Entry` we must + // include this index so it can be known. + uint64 start_index = 6; } diff --git a/src/eraftpb.rs b/src/eraftpb.rs index d00f4cd89..dd3008ccc 100644 --- a/src/eraftpb.rs +++ b/src/eraftpb.rs @@ -366,6 +366,8 @@ impl ::protobuf::reflect::ProtobufValue for Entry { pub struct SnapshotMetadata { // message fields pub conf_state: ::protobuf::SingularPtrField, + pub pending_membership_change: ::protobuf::SingularPtrField, + pub pending_membership_change_index: u64, pub index: u64, pub term: u64, // special fields @@ -411,6 +413,54 @@ impl SnapshotMetadata { self.conf_state.as_ref().unwrap_or_else(|| ConfState::default_instance()) } + // .eraftpb.ConfState pending_membership_change = 4; + + pub fn clear_pending_membership_change(&mut self) { + self.pending_membership_change.clear(); + } + + pub fn has_pending_membership_change(&self) -> bool { + self.pending_membership_change.is_some() + } + + // Param is passed by value, moved + pub fn set_pending_membership_change(&mut self, v: ConfState) { + self.pending_membership_change = ::protobuf::SingularPtrField::some(v); + } + + // Mutable pointer to the field. + // If field is not initialized, it is initialized with default value first. + pub fn mut_pending_membership_change(&mut self) -> &mut ConfState { + if self.pending_membership_change.is_none() { + self.pending_membership_change.set_default(); + } + self.pending_membership_change.as_mut().unwrap() + } + + // Take field + pub fn take_pending_membership_change(&mut self) -> ConfState { + self.pending_membership_change.take().unwrap_or_else(|| ConfState::new()) + } + + pub fn get_pending_membership_change(&self) -> &ConfState { + self.pending_membership_change.as_ref().unwrap_or_else(|| ConfState::default_instance()) + } + + // uint64 pending_membership_change_index = 5; + + pub fn clear_pending_membership_change_index(&mut self) { + self.pending_membership_change_index = 0; + } + + // Param is passed by value, moved + pub fn set_pending_membership_change_index(&mut self, v: u64) { + self.pending_membership_change_index = v; + } + + pub fn get_pending_membership_change_index(&self) -> u64 { + self.pending_membership_change_index + } + // uint64 index = 2; pub fn clear_index(&mut self) { @@ -449,6 +499,11 @@ impl ::protobuf::Message for SnapshotMetadata { return false; } }; + for v in &self.pending_membership_change { + if !v.is_initialized() { + return false; + } + }; true } @@ -459,6 +514,16 @@ impl ::protobuf::Message for SnapshotMetadata { 1 => { ::protobuf::rt::read_singular_message_into(wire_type, is, &mut self.conf_state)?; }, + 4 => { + ::protobuf::rt::read_singular_message_into(wire_type, is, &mut self.pending_membership_change)?; + }, + 5 => { + if wire_type != ::protobuf::wire_format::WireTypeVarint { + return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); + } + let tmp = is.read_uint64()?; + self.pending_membership_change_index = tmp; + }, 2 => { if wire_type != ::protobuf::wire_format::WireTypeVarint { return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); @@ -489,6 +554,13 @@ impl ::protobuf::Message for SnapshotMetadata { let len = v.compute_size(); my_size += 1 + ::protobuf::rt::compute_raw_varint32_size(len) + len; } + if let Some(ref v) = self.pending_membership_change.as_ref() { + let len = v.compute_size(); + my_size += 1 + ::protobuf::rt::compute_raw_varint32_size(len) + len; + } + if self.pending_membership_change_index != 0 { + my_size += ::protobuf::rt::value_size(5, self.pending_membership_change_index, ::protobuf::wire_format::WireTypeVarint); + } if self.index != 0 { my_size += ::protobuf::rt::value_size(2, self.index, ::protobuf::wire_format::WireTypeVarint); } @@ -506,6 +578,14 @@ impl ::protobuf::Message for SnapshotMetadata { os.write_raw_varint32(v.get_cached_size())?; v.write_to_with_cached_sizes(os)?; } + if let Some(ref v) = self.pending_membership_change.as_ref() { + os.write_tag(4, ::protobuf::wire_format::WireTypeLengthDelimited)?; + os.write_raw_varint32(v.get_cached_size())?; + v.write_to_with_cached_sizes(os)?; + } + if self.pending_membership_change_index != 0 { + os.write_uint64(5, self.pending_membership_change_index)?; + } if self.index != 0 { os.write_uint64(2, self.index)?; } @@ -559,6 +639,16 @@ impl ::protobuf::Message for SnapshotMetadata { |m: &SnapshotMetadata| { &m.conf_state }, |m: &mut SnapshotMetadata| { &mut m.conf_state }, )); + fields.push(::protobuf::reflect::accessor::make_singular_ptr_field_accessor::<_, ::protobuf::types::ProtobufTypeMessage>( + "pending_membership_change", + |m: &SnapshotMetadata| { &m.pending_membership_change }, + |m: &mut SnapshotMetadata| { &mut m.pending_membership_change }, + )); + fields.push(::protobuf::reflect::accessor::make_simple_field_accessor::<_, ::protobuf::types::ProtobufTypeUint64>( + "pending_membership_change_index", + |m: &SnapshotMetadata| { &m.pending_membership_change_index }, + |m: &mut SnapshotMetadata| { &mut m.pending_membership_change_index }, + )); fields.push(::protobuf::reflect::accessor::make_simple_field_accessor::<_, ::protobuf::types::ProtobufTypeUint64>( "index", |m: &SnapshotMetadata| { &m.index }, @@ -592,6 +682,8 @@ impl ::protobuf::Message for SnapshotMetadata { impl ::protobuf::Clear for SnapshotMetadata { fn clear(&mut self) { self.clear_conf_state(); + self.clear_pending_membership_change(); + self.clear_pending_membership_change_index(); self.clear_index(); self.clear_term(); self.unknown_fields.clear(); @@ -1846,6 +1938,8 @@ pub struct ConfChange { pub change_type: ConfChangeType, pub node_id: u64, pub context: ::std::vec::Vec, + pub configuration: ::protobuf::SingularPtrField, + pub start_index: u64, // special fields pub unknown_fields: ::protobuf::UnknownFields, pub cached_size: ::protobuf::CachedSize, @@ -1926,10 +2020,63 @@ impl ConfChange { pub fn get_context(&self) -> &[u8] { &self.context } + + // .eraftpb.ConfState configuration = 5; + + pub fn clear_configuration(&mut self) { + self.configuration.clear(); + } + + pub fn has_configuration(&self) -> bool { + self.configuration.is_some() + } + + // Param is passed by value, moved + pub fn set_configuration(&mut self, v: ConfState) { + self.configuration = ::protobuf::SingularPtrField::some(v); + } + + // Mutable pointer to the field. + // If field is not initialized, it is initialized with default value first. + pub fn mut_configuration(&mut self) -> &mut ConfState { + if self.configuration.is_none() { + self.configuration.set_default(); + } + self.configuration.as_mut().unwrap() + } + + // Take field + pub fn take_configuration(&mut self) -> ConfState { + self.configuration.take().unwrap_or_else(|| ConfState::new()) + } + + pub fn get_configuration(&self) -> &ConfState { + self.configuration.as_ref().unwrap_or_else(|| ConfState::default_instance()) + } + + // uint64 start_index = 6; + + pub fn clear_start_index(&mut self) { + self.start_index = 0; + } + + // Param is passed by value, moved + pub fn set_start_index(&mut self, v: u64) { + self.start_index = v; + } + + pub fn get_start_index(&self) -> u64 { + self.start_index + } } impl ::protobuf::Message for ConfChange { fn is_initialized(&self) -> bool { + for v in &self.configuration { + if !v.is_initialized() { + return false; + } + }; true } @@ -1957,6 +2104,16 @@ impl ::protobuf::Message for ConfChange { 4 => { ::protobuf::rt::read_singular_proto3_bytes_into(wire_type, is, &mut self.context)?; }, + 5 => { + ::protobuf::rt::read_singular_message_into(wire_type, is, &mut self.configuration)?; + }, + 6 => { + if wire_type != ::protobuf::wire_format::WireTypeVarint { + return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); + } + let tmp = is.read_uint64()?; + self.start_index = tmp; + }, _ => { ::protobuf::rt::read_unknown_or_skip_group(field_number, wire_type, is, self.mut_unknown_fields())?; }, @@ -1981,6 +2138,13 @@ impl ::protobuf::Message for ConfChange { if !self.context.is_empty() { my_size += ::protobuf::rt::bytes_size(4, &self.context); } + if let Some(ref v) = self.configuration.as_ref() { + let len = v.compute_size(); + my_size += 1 + ::protobuf::rt::compute_raw_varint32_size(len) + len; + } + if self.start_index != 0 { + my_size += ::protobuf::rt::value_size(6, self.start_index, ::protobuf::wire_format::WireTypeVarint); + } my_size += ::protobuf::rt::unknown_fields_size(self.get_unknown_fields()); self.cached_size.set(my_size); my_size @@ -1999,6 +2163,14 @@ impl ::protobuf::Message for ConfChange { if !self.context.is_empty() { os.write_bytes(4, &self.context)?; } + if let Some(ref v) = self.configuration.as_ref() { + os.write_tag(5, ::protobuf::wire_format::WireTypeLengthDelimited)?; + os.write_raw_varint32(v.get_cached_size())?; + v.write_to_with_cached_sizes(os)?; + } + if self.start_index != 0 { + os.write_uint64(6, self.start_index)?; + } os.write_unknown_fields(self.get_unknown_fields())?; ::std::result::Result::Ok(()) } @@ -2061,6 +2233,16 @@ impl ::protobuf::Message for ConfChange { |m: &ConfChange| { &m.context }, |m: &mut ConfChange| { &mut m.context }, )); + fields.push(::protobuf::reflect::accessor::make_singular_ptr_field_accessor::<_, ::protobuf::types::ProtobufTypeMessage>( + "configuration", + |m: &ConfChange| { &m.configuration }, + |m: &mut ConfChange| { &mut m.configuration }, + )); + fields.push(::protobuf::reflect::accessor::make_simple_field_accessor::<_, ::protobuf::types::ProtobufTypeUint64>( + "start_index", + |m: &ConfChange| { &m.start_index }, + |m: &mut ConfChange| { &mut m.start_index }, + )); ::protobuf::reflect::MessageDescriptor::new::( "ConfChange", fields, @@ -2087,6 +2269,8 @@ impl ::protobuf::Clear for ConfChange { self.clear_change_type(); self.clear_node_id(); self.clear_context(); + self.clear_configuration(); + self.clear_start_index(); self.unknown_fields.clear(); } } @@ -2269,6 +2453,8 @@ pub enum ConfChangeType { AddNode = 0, RemoveNode = 1, AddLearnerNode = 2, + BeginMembershipChange = 3, + FinalizeMembershipChange = 4, } impl ::protobuf::ProtobufEnum for ConfChangeType { @@ -2281,6 +2467,8 @@ impl ::protobuf::ProtobufEnum for ConfChangeType { 0 => ::std::option::Option::Some(ConfChangeType::AddNode), 1 => ::std::option::Option::Some(ConfChangeType::RemoveNode), 2 => ::std::option::Option::Some(ConfChangeType::AddLearnerNode), + 3 => ::std::option::Option::Some(ConfChangeType::BeginMembershipChange), + 4 => ::std::option::Option::Some(ConfChangeType::FinalizeMembershipChange), _ => ::std::option::Option::None } } @@ -2290,6 +2478,8 @@ impl ::protobuf::ProtobufEnum for ConfChangeType { ConfChangeType::AddNode, ConfChangeType::RemoveNode, ConfChangeType::AddLearnerNode, + ConfChangeType::BeginMembershipChange, + ConfChangeType::FinalizeMembershipChange, ]; values } @@ -2328,16 +2518,19 @@ static file_descriptor_proto_data: &'static [u8] = b"\ \n\x04term\x18\x02\x20\x01(\x04R\x04term\x12\x14\n\x05index\x18\x03\x20\ \x01(\x04R\x05index\x12\x12\n\x04data\x18\x04\x20\x01(\x0cR\x04data\x12\ \x18\n\x07context\x18\x06\x20\x01(\x0cR\x07context\x12\x19\n\x08sync_log\ - \x18\x05\x20\x01(\x08R\x07syncLog\"o\n\x10SnapshotMetadata\x121\n\nconf_\ - state\x18\x01\x20\x01(\x0b2\x12.eraftpb.ConfStateR\tconfState\x12\x14\n\ - \x05index\x18\x02\x20\x01(\x04R\x05index\x12\x12\n\x04term\x18\x03\x20\ - \x01(\x04R\x04term\"U\n\x08Snapshot\x12\x12\n\x04data\x18\x01\x20\x01(\ - \x0cR\x04data\x125\n\x08metadata\x18\x02\x20\x01(\x0b2\x19.eraftpb.Snaps\ - hotMetadataR\x08metadata\"\xe7\x02\n\x07Message\x12/\n\x08msg_type\x18\ - \x01\x20\x01(\x0e2\x14.eraftpb.MessageTypeR\x07msgType\x12\x0e\n\x02to\ - \x18\x02\x20\x01(\x04R\x02to\x12\x12\n\x04from\x18\x03\x20\x01(\x04R\x04\ - from\x12\x12\n\x04term\x18\x04\x20\x01(\x04R\x04term\x12\x19\n\x08log_te\ - rm\x18\x05\x20\x01(\x04R\x07logTerm\x12\x14\n\x05index\x18\x06\x20\x01(\ + \x18\x05\x20\x01(\x08R\x07syncLog\"\x86\x02\n\x10SnapshotMetadata\x121\n\ + \nconf_state\x18\x01\x20\x01(\x0b2\x12.eraftpb.ConfStateR\tconfState\x12\ + N\n\x19pending_membership_change\x18\x04\x20\x01(\x0b2\x12.eraftpb.ConfS\ + tateR\x17pendingMembershipChange\x12E\n\x1fpending_membership_change_ind\ + ex\x18\x05\x20\x01(\x04R\x1cpendingMembershipChangeIndex\x12\x14\n\x05in\ + dex\x18\x02\x20\x01(\x04R\x05index\x12\x12\n\x04term\x18\x03\x20\x01(\ + \x04R\x04term\"U\n\x08Snapshot\x12\x12\n\x04data\x18\x01\x20\x01(\x0cR\ + \x04data\x125\n\x08metadata\x18\x02\x20\x01(\x0b2\x19.eraftpb.SnapshotMe\ + tadataR\x08metadata\"\xe7\x02\n\x07Message\x12/\n\x08msg_type\x18\x01\ + \x20\x01(\x0e2\x14.eraftpb.MessageTypeR\x07msgType\x12\x0e\n\x02to\x18\ + \x02\x20\x01(\x04R\x02to\x12\x12\n\x04from\x18\x03\x20\x01(\x04R\x04from\ + \x12\x12\n\x04term\x18\x04\x20\x01(\x04R\x04term\x12\x19\n\x08log_term\ + \x18\x05\x20\x01(\x04R\x07logTerm\x12\x14\n\x05index\x18\x06\x20\x01(\ \x04R\x05index\x12(\n\x07entries\x18\x07\x20\x03(\x0b2\x0e.eraftpb.Entry\ R\x07entries\x12\x16\n\x06commit\x18\x08\x20\x01(\x04R\x06commit\x12-\n\ \x08snapshot\x18\t\x20\x01(\x0b2\x11.eraftpb.SnapshotR\x08snapshot\x12\ @@ -2347,11 +2540,13 @@ static file_descriptor_proto_data: &'static [u8] = b"\ \x04term\x12\x12\n\x04vote\x18\x02\x20\x01(\x04R\x04vote\x12\x16\n\x06co\ mmit\x18\x03\x20\x01(\x04R\x06commit\"=\n\tConfState\x12\x14\n\x05nodes\ \x18\x01\x20\x03(\x04R\x05nodes\x12\x1a\n\x08learners\x18\x02\x20\x03(\ - \x04R\x08learners\"\x89\x01\n\nConfChange\x12\x0e\n\x02id\x18\x01\x20\ + \x04R\x08learners\"\xe4\x01\n\nConfChange\x12\x0e\n\x02id\x18\x01\x20\ \x01(\x04R\x02id\x128\n\x0bchange_type\x18\x02\x20\x01(\x0e2\x17.eraftpb\ .ConfChangeTypeR\nchangeType\x12\x17\n\x07node_id\x18\x03\x20\x01(\x04R\ - \x06nodeId\x12\x18\n\x07context\x18\x04\x20\x01(\x0cR\x07context*1\n\tEn\ - tryType\x12\x0f\n\x0bEntryNormal\x10\0\x12\x13\n\x0fEntryConfChange\x10\ + \x06nodeId\x12\x18\n\x07context\x18\x04\x20\x01(\x0cR\x07context\x128\n\ + \rconfiguration\x18\x05\x20\x01(\x0b2\x12.eraftpb.ConfStateR\rconfigurat\ + ion\x12\x1f\n\x0bstart_index\x18\x06\x20\x01(\x04R\nstartIndex*1\n\tEntr\ + yType\x12\x0f\n\x0bEntryNormal\x10\0\x12\x13\n\x0fEntryConfChange\x10\ \x01*\x8c\x03\n\x0bMessageType\x12\n\n\x06MsgHup\x10\0\x12\x0b\n\x07MsgB\ eat\x10\x01\x12\x0e\n\nMsgPropose\x10\x02\x12\r\n\tMsgAppend\x10\x03\x12\ \x15\n\x11MsgAppendResponse\x10\x04\x12\x12\n\x0eMsgRequestVote\x10\x05\ @@ -2361,191 +2556,216 @@ static file_descriptor_proto_data: &'static [u8] = b"\ \x10\x0b\x12\x12\n\x0eMsgCheckQuorum\x10\x0c\x12\x15\n\x11MsgTransferLea\ der\x10\r\x12\x11\n\rMsgTimeoutNow\x10\x0e\x12\x10\n\x0cMsgReadIndex\x10\ \x0f\x12\x14\n\x10MsgReadIndexResp\x10\x10\x12\x15\n\x11MsgRequestPreVot\ - e\x10\x11\x12\x1d\n\x19MsgRequestPreVoteResponse\x10\x12*A\n\x0eConfChan\ + e\x10\x11\x12\x1d\n\x19MsgRequestPreVoteResponse\x10\x12*z\n\x0eConfChan\ geType\x12\x0b\n\x07AddNode\x10\0\x12\x0e\n\nRemoveNode\x10\x01\x12\x12\ - \n\x0eAddLearnerNode\x10\x02J\xd7\x20\n\x06\x12\x04\0\0d\x01\n\x08\n\x01\ - \x0c\x12\x03\0\0\x12\n\x08\n\x01\x02\x12\x03\x01\x08\x0f\n\n\n\x02\x05\0\ - \x12\x04\x03\0\x06\x01\n\n\n\x03\x05\0\x01\x12\x03\x03\x05\x0e\n\x0b\n\ - \x04\x05\0\x02\0\x12\x03\x04\x04\x14\n\x0c\n\x05\x05\0\x02\0\x01\x12\x03\ - \x04\x04\x0f\n\x0c\n\x05\x05\0\x02\0\x02\x12\x03\x04\x12\x13\n\x0b\n\x04\ - \x05\0\x02\x01\x12\x03\x05\x04\x18\n\x0c\n\x05\x05\0\x02\x01\x01\x12\x03\ - \x05\x04\x13\n\x0c\n\x05\x05\0\x02\x01\x02\x12\x03\x05\x16\x17\n\xdd\x04\ - \n\x02\x04\0\x12\x04\x12\0\x1c\x01\x1a\xd0\x04\x20The\x20entry\x20is\x20\ - a\x20type\x20of\x20change\x20that\x20needs\x20to\x20be\x20applied.\x20It\ - \x20contains\x20two\x20data\x20fields.\n\x20While\x20the\x20fields\x20ar\ - e\x20built\x20into\x20the\x20model;\x20their\x20usage\x20is\x20determine\ - d\x20by\x20the\x20entry_type.\n\n\x20For\x20normal\x20entries,\x20the\ - \x20data\x20field\x20should\x20contain\x20the\x20data\x20change\x20that\ - \x20should\x20be\x20applied.\n\x20The\x20context\x20field\x20can\x20be\ - \x20used\x20for\x20any\x20contextual\x20data\x20that\x20might\x20be\x20r\ - elevant\x20to\x20the\n\x20application\x20of\x20the\x20data.\n\n\x20For\ - \x20configuration\x20changes,\x20the\x20data\x20will\x20contain\x20the\ - \x20ConfChange\x20message\x20and\x20the\n\x20context\x20will\x20provide\ - \x20anything\x20needed\x20to\x20assist\x20the\x20configuration\x20change\ - .\x20The\x20context\n\x20if\x20for\x20the\x20user\x20to\x20set\x20and\ - \x20use\x20in\x20this\x20case.\n\n\n\n\x03\x04\0\x01\x12\x03\x12\x08\r\n\ - \x0b\n\x04\x04\0\x02\0\x12\x03\x13\x04\x1d\n\r\n\x05\x04\0\x02\0\x04\x12\ - \x04\x13\x04\x12\x0f\n\x0c\n\x05\x04\0\x02\0\x06\x12\x03\x13\x04\r\n\x0c\ - \n\x05\x04\0\x02\0\x01\x12\x03\x13\x0e\x18\n\x0c\n\x05\x04\0\x02\0\x03\ - \x12\x03\x13\x1b\x1c\n\x0b\n\x04\x04\0\x02\x01\x12\x03\x14\x04\x14\n\r\n\ - \x05\x04\0\x02\x01\x04\x12\x04\x14\x04\x13\x1d\n\x0c\n\x05\x04\0\x02\x01\ - \x05\x12\x03\x14\x04\n\n\x0c\n\x05\x04\0\x02\x01\x01\x12\x03\x14\x0b\x0f\ - \n\x0c\n\x05\x04\0\x02\x01\x03\x12\x03\x14\x12\x13\n\x0b\n\x04\x04\0\x02\ - \x02\x12\x03\x15\x04\x15\n\r\n\x05\x04\0\x02\x02\x04\x12\x04\x15\x04\x14\ - \x14\n\x0c\n\x05\x04\0\x02\x02\x05\x12\x03\x15\x04\n\n\x0c\n\x05\x04\0\ - \x02\x02\x01\x12\x03\x15\x0b\x10\n\x0c\n\x05\x04\0\x02\x02\x03\x12\x03\ - \x15\x13\x14\n\x0b\n\x04\x04\0\x02\x03\x12\x03\x16\x04\x13\n\r\n\x05\x04\ - \0\x02\x03\x04\x12\x04\x16\x04\x15\x15\n\x0c\n\x05\x04\0\x02\x03\x05\x12\ - \x03\x16\x04\t\n\x0c\n\x05\x04\0\x02\x03\x01\x12\x03\x16\n\x0e\n\x0c\n\ - \x05\x04\0\x02\x03\x03\x12\x03\x16\x11\x12\n\x0b\n\x04\x04\0\x02\x04\x12\ - \x03\x17\x04\x16\n\r\n\x05\x04\0\x02\x04\x04\x12\x04\x17\x04\x16\x13\n\ - \x0c\n\x05\x04\0\x02\x04\x05\x12\x03\x17\x04\t\n\x0c\n\x05\x04\0\x02\x04\ - \x01\x12\x03\x17\n\x11\n\x0c\n\x05\x04\0\x02\x04\x03\x12\x03\x17\x14\x15\ - \nm\n\x04\x04\0\x02\x05\x12\x03\x1b\x04\x16\x1a`\x20Deprecated!\x20It\ - \x20is\x20kept\x20for\x20backward\x20compatibility.\n\x20TODO:\x20remove\ - \x20it\x20in\x20the\x20next\x20major\x20release.\n\n\r\n\x05\x04\0\x02\ - \x05\x04\x12\x04\x1b\x04\x17\x16\n\x0c\n\x05\x04\0\x02\x05\x05\x12\x03\ - \x1b\x04\x08\n\x0c\n\x05\x04\0\x02\x05\x01\x12\x03\x1b\t\x11\n\x0c\n\x05\ - \x04\0\x02\x05\x03\x12\x03\x1b\x14\x15\n\n\n\x02\x04\x01\x12\x04\x1e\0\"\ - \x01\n\n\n\x03\x04\x01\x01\x12\x03\x1e\x08\x18\n\x0b\n\x04\x04\x01\x02\0\ - \x12\x03\x1f\x04\x1d\n\r\n\x05\x04\x01\x02\0\x04\x12\x04\x1f\x04\x1e\x1a\ - \n\x0c\n\x05\x04\x01\x02\0\x06\x12\x03\x1f\x04\r\n\x0c\n\x05\x04\x01\x02\ - \0\x01\x12\x03\x1f\x0e\x18\n\x0c\n\x05\x04\x01\x02\0\x03\x12\x03\x1f\x1b\ - \x1c\n\x0b\n\x04\x04\x01\x02\x01\x12\x03\x20\x04\x15\n\r\n\x05\x04\x01\ - \x02\x01\x04\x12\x04\x20\x04\x1f\x1d\n\x0c\n\x05\x04\x01\x02\x01\x05\x12\ - \x03\x20\x04\n\n\x0c\n\x05\x04\x01\x02\x01\x01\x12\x03\x20\x0b\x10\n\x0c\ - \n\x05\x04\x01\x02\x01\x03\x12\x03\x20\x13\x14\n\x0b\n\x04\x04\x01\x02\ - \x02\x12\x03!\x04\x14\n\r\n\x05\x04\x01\x02\x02\x04\x12\x04!\x04\x20\x15\ - \n\x0c\n\x05\x04\x01\x02\x02\x05\x12\x03!\x04\n\n\x0c\n\x05\x04\x01\x02\ - \x02\x01\x12\x03!\x0b\x0f\n\x0c\n\x05\x04\x01\x02\x02\x03\x12\x03!\x12\ - \x13\n\n\n\x02\x04\x02\x12\x04$\0'\x01\n\n\n\x03\x04\x02\x01\x12\x03$\ - \x08\x10\n\x0b\n\x04\x04\x02\x02\0\x12\x03%\x04\x13\n\r\n\x05\x04\x02\ - \x02\0\x04\x12\x04%\x04$\x12\n\x0c\n\x05\x04\x02\x02\0\x05\x12\x03%\x04\ - \t\n\x0c\n\x05\x04\x02\x02\0\x01\x12\x03%\n\x0e\n\x0c\n\x05\x04\x02\x02\ - \0\x03\x12\x03%\x11\x12\n\x0b\n\x04\x04\x02\x02\x01\x12\x03&\x04\"\n\r\n\ - \x05\x04\x02\x02\x01\x04\x12\x04&\x04%\x13\n\x0c\n\x05\x04\x02\x02\x01\ - \x06\x12\x03&\x04\x14\n\x0c\n\x05\x04\x02\x02\x01\x01\x12\x03&\x15\x1d\n\ - \x0c\n\x05\x04\x02\x02\x01\x03\x12\x03&\x20!\n\n\n\x02\x05\x01\x12\x04)\ - \0=\x01\n\n\n\x03\x05\x01\x01\x12\x03)\x05\x10\n\x0b\n\x04\x05\x01\x02\0\ - \x12\x03*\x04\x0f\n\x0c\n\x05\x05\x01\x02\0\x01\x12\x03*\x04\n\n\x0c\n\ - \x05\x05\x01\x02\0\x02\x12\x03*\r\x0e\n\x0b\n\x04\x05\x01\x02\x01\x12\ - \x03+\x04\x10\n\x0c\n\x05\x05\x01\x02\x01\x01\x12\x03+\x04\x0b\n\x0c\n\ - \x05\x05\x01\x02\x01\x02\x12\x03+\x0e\x0f\n\x0b\n\x04\x05\x01\x02\x02\ - \x12\x03,\x04\x13\n\x0c\n\x05\x05\x01\x02\x02\x01\x12\x03,\x04\x0e\n\x0c\ - \n\x05\x05\x01\x02\x02\x02\x12\x03,\x11\x12\n\x0b\n\x04\x05\x01\x02\x03\ - \x12\x03-\x04\x12\n\x0c\n\x05\x05\x01\x02\x03\x01\x12\x03-\x04\r\n\x0c\n\ - \x05\x05\x01\x02\x03\x02\x12\x03-\x10\x11\n\x0b\n\x04\x05\x01\x02\x04\ - \x12\x03.\x04\x1a\n\x0c\n\x05\x05\x01\x02\x04\x01\x12\x03.\x04\x15\n\x0c\ - \n\x05\x05\x01\x02\x04\x02\x12\x03.\x18\x19\n\x0b\n\x04\x05\x01\x02\x05\ - \x12\x03/\x04\x17\n\x0c\n\x05\x05\x01\x02\x05\x01\x12\x03/\x04\x12\n\x0c\ - \n\x05\x05\x01\x02\x05\x02\x12\x03/\x15\x16\n\x0b\n\x04\x05\x01\x02\x06\ - \x12\x030\x04\x1f\n\x0c\n\x05\x05\x01\x02\x06\x01\x12\x030\x04\x1a\n\x0c\ - \n\x05\x05\x01\x02\x06\x02\x12\x030\x1d\x1e\n\x0b\n\x04\x05\x01\x02\x07\ - \x12\x031\x04\x14\n\x0c\n\x05\x05\x01\x02\x07\x01\x12\x031\x04\x0f\n\x0c\ - \n\x05\x05\x01\x02\x07\x02\x12\x031\x12\x13\n\x0b\n\x04\x05\x01\x02\x08\ - \x12\x032\x04\x15\n\x0c\n\x05\x05\x01\x02\x08\x01\x12\x032\x04\x10\n\x0c\ - \n\x05\x05\x01\x02\x08\x02\x12\x032\x13\x14\n\x0b\n\x04\x05\x01\x02\t\ - \x12\x033\x04\x1d\n\x0c\n\x05\x05\x01\x02\t\x01\x12\x033\x04\x18\n\x0c\n\ - \x05\x05\x01\x02\t\x02\x12\x033\x1b\x1c\n\x0b\n\x04\x05\x01\x02\n\x12\ - \x034\x04\x18\n\x0c\n\x05\x05\x01\x02\n\x01\x12\x034\x04\x12\n\x0c\n\x05\ - \x05\x01\x02\n\x02\x12\x034\x15\x17\n\x0b\n\x04\x05\x01\x02\x0b\x12\x035\ - \x04\x17\n\x0c\n\x05\x05\x01\x02\x0b\x01\x12\x035\x04\x11\n\x0c\n\x05\ - \x05\x01\x02\x0b\x02\x12\x035\x14\x16\n\x0b\n\x04\x05\x01\x02\x0c\x12\ - \x036\x04\x18\n\x0c\n\x05\x05\x01\x02\x0c\x01\x12\x036\x04\x12\n\x0c\n\ - \x05\x05\x01\x02\x0c\x02\x12\x036\x15\x17\n\x0b\n\x04\x05\x01\x02\r\x12\ - \x037\x04\x1b\n\x0c\n\x05\x05\x01\x02\r\x01\x12\x037\x04\x15\n\x0c\n\x05\ - \x05\x01\x02\r\x02\x12\x037\x18\x1a\n\x0b\n\x04\x05\x01\x02\x0e\x12\x038\ - \x04\x17\n\x0c\n\x05\x05\x01\x02\x0e\x01\x12\x038\x04\x11\n\x0c\n\x05\ - \x05\x01\x02\x0e\x02\x12\x038\x14\x16\n\x0b\n\x04\x05\x01\x02\x0f\x12\ - \x039\x04\x16\n\x0c\n\x05\x05\x01\x02\x0f\x01\x12\x039\x04\x10\n\x0c\n\ - \x05\x05\x01\x02\x0f\x02\x12\x039\x13\x15\n\x0b\n\x04\x05\x01\x02\x10\ - \x12\x03:\x04\x1a\n\x0c\n\x05\x05\x01\x02\x10\x01\x12\x03:\x04\x14\n\x0c\ - \n\x05\x05\x01\x02\x10\x02\x12\x03:\x17\x19\n\x0b\n\x04\x05\x01\x02\x11\ - \x12\x03;\x04\x1b\n\x0c\n\x05\x05\x01\x02\x11\x01\x12\x03;\x04\x15\n\x0c\ - \n\x05\x05\x01\x02\x11\x02\x12\x03;\x18\x1a\n\x0b\n\x04\x05\x01\x02\x12\ - \x12\x03<\x04#\n\x0c\n\x05\x05\x01\x02\x12\x01\x12\x03<\x04\x1d\n\x0c\n\ - \x05\x05\x01\x02\x12\x02\x12\x03<\x20\"\n\n\n\x02\x04\x03\x12\x04?\0L\ - \x01\n\n\n\x03\x04\x03\x01\x12\x03?\x08\x0f\n\x0b\n\x04\x04\x03\x02\0\ - \x12\x03@\x04\x1d\n\r\n\x05\x04\x03\x02\0\x04\x12\x04@\x04?\x11\n\x0c\n\ - \x05\x04\x03\x02\0\x06\x12\x03@\x04\x0f\n\x0c\n\x05\x04\x03\x02\0\x01\ - \x12\x03@\x10\x18\n\x0c\n\x05\x04\x03\x02\0\x03\x12\x03@\x1b\x1c\n\x0b\n\ - \x04\x04\x03\x02\x01\x12\x03A\x04\x12\n\r\n\x05\x04\x03\x02\x01\x04\x12\ - \x04A\x04@\x1d\n\x0c\n\x05\x04\x03\x02\x01\x05\x12\x03A\x04\n\n\x0c\n\ - \x05\x04\x03\x02\x01\x01\x12\x03A\x0b\r\n\x0c\n\x05\x04\x03\x02\x01\x03\ - \x12\x03A\x10\x11\n\x0b\n\x04\x04\x03\x02\x02\x12\x03B\x04\x14\n\r\n\x05\ - \x04\x03\x02\x02\x04\x12\x04B\x04A\x12\n\x0c\n\x05\x04\x03\x02\x02\x05\ - \x12\x03B\x04\n\n\x0c\n\x05\x04\x03\x02\x02\x01\x12\x03B\x0b\x0f\n\x0c\n\ - \x05\x04\x03\x02\x02\x03\x12\x03B\x12\x13\n\x0b\n\x04\x04\x03\x02\x03\ - \x12\x03C\x04\x14\n\r\n\x05\x04\x03\x02\x03\x04\x12\x04C\x04B\x14\n\x0c\ - \n\x05\x04\x03\x02\x03\x05\x12\x03C\x04\n\n\x0c\n\x05\x04\x03\x02\x03\ - \x01\x12\x03C\x0b\x0f\n\x0c\n\x05\x04\x03\x02\x03\x03\x12\x03C\x12\x13\n\ - \x0b\n\x04\x04\x03\x02\x04\x12\x03D\x04\x18\n\r\n\x05\x04\x03\x02\x04\ - \x04\x12\x04D\x04C\x14\n\x0c\n\x05\x04\x03\x02\x04\x05\x12\x03D\x04\n\n\ - \x0c\n\x05\x04\x03\x02\x04\x01\x12\x03D\x0b\x13\n\x0c\n\x05\x04\x03\x02\ - \x04\x03\x12\x03D\x16\x17\n\x0b\n\x04\x04\x03\x02\x05\x12\x03E\x04\x15\n\ - \r\n\x05\x04\x03\x02\x05\x04\x12\x04E\x04D\x18\n\x0c\n\x05\x04\x03\x02\ - \x05\x05\x12\x03E\x04\n\n\x0c\n\x05\x04\x03\x02\x05\x01\x12\x03E\x0b\x10\ - \n\x0c\n\x05\x04\x03\x02\x05\x03\x12\x03E\x13\x14\n\x0b\n\x04\x04\x03\ - \x02\x06\x12\x03F\x04\x1f\n\x0c\n\x05\x04\x03\x02\x06\x04\x12\x03F\x04\ - \x0c\n\x0c\n\x05\x04\x03\x02\x06\x06\x12\x03F\r\x12\n\x0c\n\x05\x04\x03\ - \x02\x06\x01\x12\x03F\x13\x1a\n\x0c\n\x05\x04\x03\x02\x06\x03\x12\x03F\ - \x1d\x1e\n\x0b\n\x04\x04\x03\x02\x07\x12\x03G\x04\x16\n\r\n\x05\x04\x03\ - \x02\x07\x04\x12\x04G\x04F\x1f\n\x0c\n\x05\x04\x03\x02\x07\x05\x12\x03G\ - \x04\n\n\x0c\n\x05\x04\x03\x02\x07\x01\x12\x03G\x0b\x11\n\x0c\n\x05\x04\ - \x03\x02\x07\x03\x12\x03G\x14\x15\n\x0b\n\x04\x04\x03\x02\x08\x12\x03H\ - \x04\x1a\n\r\n\x05\x04\x03\x02\x08\x04\x12\x04H\x04G\x16\n\x0c\n\x05\x04\ - \x03\x02\x08\x06\x12\x03H\x04\x0c\n\x0c\n\x05\x04\x03\x02\x08\x01\x12\ - \x03H\r\x15\n\x0c\n\x05\x04\x03\x02\x08\x03\x12\x03H\x18\x19\n\x0b\n\x04\ - \x04\x03\x02\t\x12\x03I\x04\x15\n\r\n\x05\x04\x03\x02\t\x04\x12\x04I\x04\ - H\x1a\n\x0c\n\x05\x04\x03\x02\t\x05\x12\x03I\x04\x08\n\x0c\n\x05\x04\x03\ - \x02\t\x01\x12\x03I\t\x0f\n\x0c\n\x05\x04\x03\x02\t\x03\x12\x03I\x12\x14\ - \n\x0b\n\x04\x04\x03\x02\n\x12\x03J\x04\x1c\n\r\n\x05\x04\x03\x02\n\x04\ - \x12\x04J\x04I\x15\n\x0c\n\x05\x04\x03\x02\n\x05\x12\x03J\x04\n\n\x0c\n\ - \x05\x04\x03\x02\n\x01\x12\x03J\x0b\x16\n\x0c\n\x05\x04\x03\x02\n\x03\ - \x12\x03J\x19\x1b\n\x0b\n\x04\x04\x03\x02\x0b\x12\x03K\x04\x17\n\r\n\x05\ - \x04\x03\x02\x0b\x04\x12\x04K\x04J\x1c\n\x0c\n\x05\x04\x03\x02\x0b\x05\ - \x12\x03K\x04\t\n\x0c\n\x05\x04\x03\x02\x0b\x01\x12\x03K\n\x11\n\x0c\n\ - \x05\x04\x03\x02\x0b\x03\x12\x03K\x14\x16\n\n\n\x02\x04\x04\x12\x04N\0R\ - \x01\n\n\n\x03\x04\x04\x01\x12\x03N\x08\x11\n\x0b\n\x04\x04\x04\x02\0\ - \x12\x03O\x04\x14\n\r\n\x05\x04\x04\x02\0\x04\x12\x04O\x04N\x13\n\x0c\n\ - \x05\x04\x04\x02\0\x05\x12\x03O\x04\n\n\x0c\n\x05\x04\x04\x02\0\x01\x12\ - \x03O\x0b\x0f\n\x0c\n\x05\x04\x04\x02\0\x03\x12\x03O\x12\x13\n\x0b\n\x04\ - \x04\x04\x02\x01\x12\x03P\x04\x14\n\r\n\x05\x04\x04\x02\x01\x04\x12\x04P\ - \x04O\x14\n\x0c\n\x05\x04\x04\x02\x01\x05\x12\x03P\x04\n\n\x0c\n\x05\x04\ - \x04\x02\x01\x01\x12\x03P\x0b\x0f\n\x0c\n\x05\x04\x04\x02\x01\x03\x12\ - \x03P\x12\x13\n\x0b\n\x04\x04\x04\x02\x02\x12\x03Q\x04\x16\n\r\n\x05\x04\ - \x04\x02\x02\x04\x12\x04Q\x04P\x14\n\x0c\n\x05\x04\x04\x02\x02\x05\x12\ - \x03Q\x04\n\n\x0c\n\x05\x04\x04\x02\x02\x01\x12\x03Q\x0b\x11\n\x0c\n\x05\ - \x04\x04\x02\x02\x03\x12\x03Q\x14\x15\n\n\n\x02\x04\x05\x12\x04T\0W\x01\ - \n\n\n\x03\x04\x05\x01\x12\x03T\x08\x11\n\x0b\n\x04\x04\x05\x02\0\x12\ - \x03U\x04\x1e\n\x0c\n\x05\x04\x05\x02\0\x04\x12\x03U\x04\x0c\n\x0c\n\x05\ - \x04\x05\x02\0\x05\x12\x03U\r\x13\n\x0c\n\x05\x04\x05\x02\0\x01\x12\x03U\ - \x14\x19\n\x0c\n\x05\x04\x05\x02\0\x03\x12\x03U\x1c\x1d\n\x0b\n\x04\x04\ - \x05\x02\x01\x12\x03V\x04!\n\x0c\n\x05\x04\x05\x02\x01\x04\x12\x03V\x04\ - \x0c\n\x0c\n\x05\x04\x05\x02\x01\x05\x12\x03V\r\x13\n\x0c\n\x05\x04\x05\ - \x02\x01\x01\x12\x03V\x14\x1c\n\x0c\n\x05\x04\x05\x02\x01\x03\x12\x03V\ - \x1f\x20\n\n\n\x02\x05\x02\x12\x04Y\0]\x01\n\n\n\x03\x05\x02\x01\x12\x03\ - Y\x05\x13\n\x0b\n\x04\x05\x02\x02\0\x12\x03Z\x04\x13\n\x0c\n\x05\x05\x02\ - \x02\0\x01\x12\x03Z\x04\x0b\n\x0c\n\x05\x05\x02\x02\0\x02\x12\x03Z\x11\ - \x12\n\x0b\n\x04\x05\x02\x02\x01\x12\x03[\x04\x13\n\x0c\n\x05\x05\x02\ - \x02\x01\x01\x12\x03[\x04\x0e\n\x0c\n\x05\x05\x02\x02\x01\x02\x12\x03[\ - \x11\x12\n\x0b\n\x04\x05\x02\x02\x02\x12\x03\\\x04\x17\n\x0c\n\x05\x05\ - \x02\x02\x02\x01\x12\x03\\\x04\x12\n\x0c\n\x05\x05\x02\x02\x02\x02\x12\ - \x03\\\x15\x16\n\n\n\x02\x04\x06\x12\x04_\0d\x01\n\n\n\x03\x04\x06\x01\ - \x12\x03_\x08\x12\n\x0b\n\x04\x04\x06\x02\0\x12\x03`\x04\x12\n\r\n\x05\ - \x04\x06\x02\0\x04\x12\x04`\x04_\x14\n\x0c\n\x05\x04\x06\x02\0\x05\x12\ - \x03`\x04\n\n\x0c\n\x05\x04\x06\x02\0\x01\x12\x03`\x0b\r\n\x0c\n\x05\x04\ - \x06\x02\0\x03\x12\x03`\x10\x11\n\x0b\n\x04\x04\x06\x02\x01\x12\x03a\x04\ - #\n\r\n\x05\x04\x06\x02\x01\x04\x12\x04a\x04`\x12\n\x0c\n\x05\x04\x06\ - \x02\x01\x06\x12\x03a\x04\x12\n\x0c\n\x05\x04\x06\x02\x01\x01\x12\x03a\ - \x13\x1e\n\x0c\n\x05\x04\x06\x02\x01\x03\x12\x03a!\"\n\x0b\n\x04\x04\x06\ - \x02\x02\x12\x03b\x04\x17\n\r\n\x05\x04\x06\x02\x02\x04\x12\x04b\x04a#\n\ - \x0c\n\x05\x04\x06\x02\x02\x05\x12\x03b\x04\n\n\x0c\n\x05\x04\x06\x02\ - \x02\x01\x12\x03b\x0b\x12\n\x0c\n\x05\x04\x06\x02\x02\x03\x12\x03b\x15\ - \x16\n\x0b\n\x04\x04\x06\x02\x03\x12\x03c\x04\x16\n\r\n\x05\x04\x06\x02\ - \x03\x04\x12\x04c\x04b\x17\n\x0c\n\x05\x04\x06\x02\x03\x05\x12\x03c\x04\ - \t\n\x0c\n\x05\x04\x06\x02\x03\x01\x12\x03c\n\x11\n\x0c\n\x05\x04\x06\ - \x02\x03\x03\x12\x03c\x14\x15b\x06proto3\ + \n\x0eAddLearnerNode\x10\x02\x12\x19\n\x15BeginMembershipChange\x10\x03\ + \x12\x1c\n\x18FinalizeMembershipChange\x10\x04J\x84&\n\x06\x12\x04\0\0o\ + \x01\n\x08\n\x01\x0c\x12\x03\0\0\x12\n\x08\n\x01\x02\x12\x03\x01\x08\x0f\ + \n\n\n\x02\x05\0\x12\x04\x03\0\x06\x01\n\n\n\x03\x05\0\x01\x12\x03\x03\ + \x05\x0e\n\x0b\n\x04\x05\0\x02\0\x12\x03\x04\x04\x14\n\x0c\n\x05\x05\0\ + \x02\0\x01\x12\x03\x04\x04\x0f\n\x0c\n\x05\x05\0\x02\0\x02\x12\x03\x04\ + \x12\x13\n\x0b\n\x04\x05\0\x02\x01\x12\x03\x05\x04\x18\n\x0c\n\x05\x05\0\ + \x02\x01\x01\x12\x03\x05\x04\x13\n\x0c\n\x05\x05\0\x02\x01\x02\x12\x03\ + \x05\x16\x17\n\xdd\x04\n\x02\x04\0\x12\x04\x12\0\x1c\x01\x1a\xd0\x04\x20\ + The\x20entry\x20is\x20a\x20type\x20of\x20change\x20that\x20needs\x20to\ + \x20be\x20applied.\x20It\x20contains\x20two\x20data\x20fields.\n\x20Whil\ + e\x20the\x20fields\x20are\x20built\x20into\x20the\x20model;\x20their\x20\ + usage\x20is\x20determined\x20by\x20the\x20entry_type.\n\n\x20For\x20norm\ + al\x20entries,\x20the\x20data\x20field\x20should\x20contain\x20the\x20da\ + ta\x20change\x20that\x20should\x20be\x20applied.\n\x20The\x20context\x20\ + field\x20can\x20be\x20used\x20for\x20any\x20contextual\x20data\x20that\ + \x20might\x20be\x20relevant\x20to\x20the\n\x20application\x20of\x20the\ + \x20data.\n\n\x20For\x20configuration\x20changes,\x20the\x20data\x20will\ + \x20contain\x20the\x20ConfChange\x20message\x20and\x20the\n\x20context\ + \x20will\x20provide\x20anything\x20needed\x20to\x20assist\x20the\x20conf\ + iguration\x20change.\x20The\x20context\n\x20if\x20for\x20the\x20user\x20\ + to\x20set\x20and\x20use\x20in\x20this\x20case.\n\n\n\n\x03\x04\0\x01\x12\ + \x03\x12\x08\r\n\x0b\n\x04\x04\0\x02\0\x12\x03\x13\x04\x1d\n\r\n\x05\x04\ + \0\x02\0\x04\x12\x04\x13\x04\x12\x0f\n\x0c\n\x05\x04\0\x02\0\x06\x12\x03\ + \x13\x04\r\n\x0c\n\x05\x04\0\x02\0\x01\x12\x03\x13\x0e\x18\n\x0c\n\x05\ + \x04\0\x02\0\x03\x12\x03\x13\x1b\x1c\n\x0b\n\x04\x04\0\x02\x01\x12\x03\ + \x14\x04\x14\n\r\n\x05\x04\0\x02\x01\x04\x12\x04\x14\x04\x13\x1d\n\x0c\n\ + \x05\x04\0\x02\x01\x05\x12\x03\x14\x04\n\n\x0c\n\x05\x04\0\x02\x01\x01\ + \x12\x03\x14\x0b\x0f\n\x0c\n\x05\x04\0\x02\x01\x03\x12\x03\x14\x12\x13\n\ + \x0b\n\x04\x04\0\x02\x02\x12\x03\x15\x04\x15\n\r\n\x05\x04\0\x02\x02\x04\ + \x12\x04\x15\x04\x14\x14\n\x0c\n\x05\x04\0\x02\x02\x05\x12\x03\x15\x04\n\ + \n\x0c\n\x05\x04\0\x02\x02\x01\x12\x03\x15\x0b\x10\n\x0c\n\x05\x04\0\x02\ + \x02\x03\x12\x03\x15\x13\x14\n\x0b\n\x04\x04\0\x02\x03\x12\x03\x16\x04\ + \x13\n\r\n\x05\x04\0\x02\x03\x04\x12\x04\x16\x04\x15\x15\n\x0c\n\x05\x04\ + \0\x02\x03\x05\x12\x03\x16\x04\t\n\x0c\n\x05\x04\0\x02\x03\x01\x12\x03\ + \x16\n\x0e\n\x0c\n\x05\x04\0\x02\x03\x03\x12\x03\x16\x11\x12\n\x0b\n\x04\ + \x04\0\x02\x04\x12\x03\x17\x04\x16\n\r\n\x05\x04\0\x02\x04\x04\x12\x04\ + \x17\x04\x16\x13\n\x0c\n\x05\x04\0\x02\x04\x05\x12\x03\x17\x04\t\n\x0c\n\ + \x05\x04\0\x02\x04\x01\x12\x03\x17\n\x11\n\x0c\n\x05\x04\0\x02\x04\x03\ + \x12\x03\x17\x14\x15\nm\n\x04\x04\0\x02\x05\x12\x03\x1b\x04\x16\x1a`\x20\ + Deprecated!\x20It\x20is\x20kept\x20for\x20backward\x20compatibility.\n\ + \x20TODO:\x20remove\x20it\x20in\x20the\x20next\x20major\x20release.\n\n\ + \r\n\x05\x04\0\x02\x05\x04\x12\x04\x1b\x04\x17\x16\n\x0c\n\x05\x04\0\x02\ + \x05\x05\x12\x03\x1b\x04\x08\n\x0c\n\x05\x04\0\x02\x05\x01\x12\x03\x1b\t\ + \x11\n\x0c\n\x05\x04\0\x02\x05\x03\x12\x03\x1b\x14\x15\n\n\n\x02\x04\x01\ + \x12\x04\x1e\0$\x01\n\n\n\x03\x04\x01\x01\x12\x03\x1e\x08\x18\n\x0b\n\ + \x04\x04\x01\x02\0\x12\x03\x1f\x04\x1d\n\r\n\x05\x04\x01\x02\0\x04\x12\ + \x04\x1f\x04\x1e\x1a\n\x0c\n\x05\x04\x01\x02\0\x06\x12\x03\x1f\x04\r\n\ + \x0c\n\x05\x04\x01\x02\0\x01\x12\x03\x1f\x0e\x18\n\x0c\n\x05\x04\x01\x02\ + \0\x03\x12\x03\x1f\x1b\x1c\n\x0b\n\x04\x04\x01\x02\x01\x12\x03\x20\x04,\ + \n\r\n\x05\x04\x01\x02\x01\x04\x12\x04\x20\x04\x1f\x1d\n\x0c\n\x05\x04\ + \x01\x02\x01\x06\x12\x03\x20\x04\r\n\x0c\n\x05\x04\x01\x02\x01\x01\x12\ + \x03\x20\x0e'\n\x0c\n\x05\x04\x01\x02\x01\x03\x12\x03\x20*+\n\x0b\n\x04\ + \x04\x01\x02\x02\x12\x03!\x04/\n\r\n\x05\x04\x01\x02\x02\x04\x12\x04!\ + \x04\x20,\n\x0c\n\x05\x04\x01\x02\x02\x05\x12\x03!\x04\n\n\x0c\n\x05\x04\ + \x01\x02\x02\x01\x12\x03!\x0b*\n\x0c\n\x05\x04\x01\x02\x02\x03\x12\x03!-\ + .\n\x0b\n\x04\x04\x01\x02\x03\x12\x03\"\x04\x15\n\r\n\x05\x04\x01\x02\ + \x03\x04\x12\x04\"\x04!/\n\x0c\n\x05\x04\x01\x02\x03\x05\x12\x03\"\x04\n\ + \n\x0c\n\x05\x04\x01\x02\x03\x01\x12\x03\"\x0b\x10\n\x0c\n\x05\x04\x01\ + \x02\x03\x03\x12\x03\"\x13\x14\n\x0b\n\x04\x04\x01\x02\x04\x12\x03#\x04\ + \x14\n\r\n\x05\x04\x01\x02\x04\x04\x12\x04#\x04\"\x15\n\x0c\n\x05\x04\ + \x01\x02\x04\x05\x12\x03#\x04\n\n\x0c\n\x05\x04\x01\x02\x04\x01\x12\x03#\ + \x0b\x0f\n\x0c\n\x05\x04\x01\x02\x04\x03\x12\x03#\x12\x13\n\n\n\x02\x04\ + \x02\x12\x04&\0)\x01\n\n\n\x03\x04\x02\x01\x12\x03&\x08\x10\n\x0b\n\x04\ + \x04\x02\x02\0\x12\x03'\x04\x13\n\r\n\x05\x04\x02\x02\0\x04\x12\x04'\x04\ + &\x12\n\x0c\n\x05\x04\x02\x02\0\x05\x12\x03'\x04\t\n\x0c\n\x05\x04\x02\ + \x02\0\x01\x12\x03'\n\x0e\n\x0c\n\x05\x04\x02\x02\0\x03\x12\x03'\x11\x12\ + \n\x0b\n\x04\x04\x02\x02\x01\x12\x03(\x04\"\n\r\n\x05\x04\x02\x02\x01\ + \x04\x12\x04(\x04'\x13\n\x0c\n\x05\x04\x02\x02\x01\x06\x12\x03(\x04\x14\ + \n\x0c\n\x05\x04\x02\x02\x01\x01\x12\x03(\x15\x1d\n\x0c\n\x05\x04\x02\ + \x02\x01\x03\x12\x03(\x20!\n\n\n\x02\x05\x01\x12\x04+\0?\x01\n\n\n\x03\ + \x05\x01\x01\x12\x03+\x05\x10\n\x0b\n\x04\x05\x01\x02\0\x12\x03,\x04\x0f\ + \n\x0c\n\x05\x05\x01\x02\0\x01\x12\x03,\x04\n\n\x0c\n\x05\x05\x01\x02\0\ + \x02\x12\x03,\r\x0e\n\x0b\n\x04\x05\x01\x02\x01\x12\x03-\x04\x10\n\x0c\n\ + \x05\x05\x01\x02\x01\x01\x12\x03-\x04\x0b\n\x0c\n\x05\x05\x01\x02\x01\ + \x02\x12\x03-\x0e\x0f\n\x0b\n\x04\x05\x01\x02\x02\x12\x03.\x04\x13\n\x0c\ + \n\x05\x05\x01\x02\x02\x01\x12\x03.\x04\x0e\n\x0c\n\x05\x05\x01\x02\x02\ + \x02\x12\x03.\x11\x12\n\x0b\n\x04\x05\x01\x02\x03\x12\x03/\x04\x12\n\x0c\ + \n\x05\x05\x01\x02\x03\x01\x12\x03/\x04\r\n\x0c\n\x05\x05\x01\x02\x03\ + \x02\x12\x03/\x10\x11\n\x0b\n\x04\x05\x01\x02\x04\x12\x030\x04\x1a\n\x0c\ + \n\x05\x05\x01\x02\x04\x01\x12\x030\x04\x15\n\x0c\n\x05\x05\x01\x02\x04\ + \x02\x12\x030\x18\x19\n\x0b\n\x04\x05\x01\x02\x05\x12\x031\x04\x17\n\x0c\ + \n\x05\x05\x01\x02\x05\x01\x12\x031\x04\x12\n\x0c\n\x05\x05\x01\x02\x05\ + \x02\x12\x031\x15\x16\n\x0b\n\x04\x05\x01\x02\x06\x12\x032\x04\x1f\n\x0c\ + \n\x05\x05\x01\x02\x06\x01\x12\x032\x04\x1a\n\x0c\n\x05\x05\x01\x02\x06\ + \x02\x12\x032\x1d\x1e\n\x0b\n\x04\x05\x01\x02\x07\x12\x033\x04\x14\n\x0c\ + \n\x05\x05\x01\x02\x07\x01\x12\x033\x04\x0f\n\x0c\n\x05\x05\x01\x02\x07\ + \x02\x12\x033\x12\x13\n\x0b\n\x04\x05\x01\x02\x08\x12\x034\x04\x15\n\x0c\ + \n\x05\x05\x01\x02\x08\x01\x12\x034\x04\x10\n\x0c\n\x05\x05\x01\x02\x08\ + \x02\x12\x034\x13\x14\n\x0b\n\x04\x05\x01\x02\t\x12\x035\x04\x1d\n\x0c\n\ + \x05\x05\x01\x02\t\x01\x12\x035\x04\x18\n\x0c\n\x05\x05\x01\x02\t\x02\ + \x12\x035\x1b\x1c\n\x0b\n\x04\x05\x01\x02\n\x12\x036\x04\x18\n\x0c\n\x05\ + \x05\x01\x02\n\x01\x12\x036\x04\x12\n\x0c\n\x05\x05\x01\x02\n\x02\x12\ + \x036\x15\x17\n\x0b\n\x04\x05\x01\x02\x0b\x12\x037\x04\x17\n\x0c\n\x05\ + \x05\x01\x02\x0b\x01\x12\x037\x04\x11\n\x0c\n\x05\x05\x01\x02\x0b\x02\ + \x12\x037\x14\x16\n\x0b\n\x04\x05\x01\x02\x0c\x12\x038\x04\x18\n\x0c\n\ + \x05\x05\x01\x02\x0c\x01\x12\x038\x04\x12\n\x0c\n\x05\x05\x01\x02\x0c\ + \x02\x12\x038\x15\x17\n\x0b\n\x04\x05\x01\x02\r\x12\x039\x04\x1b\n\x0c\n\ + \x05\x05\x01\x02\r\x01\x12\x039\x04\x15\n\x0c\n\x05\x05\x01\x02\r\x02\ + \x12\x039\x18\x1a\n\x0b\n\x04\x05\x01\x02\x0e\x12\x03:\x04\x17\n\x0c\n\ + \x05\x05\x01\x02\x0e\x01\x12\x03:\x04\x11\n\x0c\n\x05\x05\x01\x02\x0e\ + \x02\x12\x03:\x14\x16\n\x0b\n\x04\x05\x01\x02\x0f\x12\x03;\x04\x16\n\x0c\ + \n\x05\x05\x01\x02\x0f\x01\x12\x03;\x04\x10\n\x0c\n\x05\x05\x01\x02\x0f\ + \x02\x12\x03;\x13\x15\n\x0b\n\x04\x05\x01\x02\x10\x12\x03<\x04\x1a\n\x0c\ + \n\x05\x05\x01\x02\x10\x01\x12\x03<\x04\x14\n\x0c\n\x05\x05\x01\x02\x10\ + \x02\x12\x03<\x17\x19\n\x0b\n\x04\x05\x01\x02\x11\x12\x03=\x04\x1b\n\x0c\ + \n\x05\x05\x01\x02\x11\x01\x12\x03=\x04\x15\n\x0c\n\x05\x05\x01\x02\x11\ + \x02\x12\x03=\x18\x1a\n\x0b\n\x04\x05\x01\x02\x12\x12\x03>\x04#\n\x0c\n\ + \x05\x05\x01\x02\x12\x01\x12\x03>\x04\x1d\n\x0c\n\x05\x05\x01\x02\x12\ + \x02\x12\x03>\x20\"\n\n\n\x02\x04\x03\x12\x04A\0N\x01\n\n\n\x03\x04\x03\ + \x01\x12\x03A\x08\x0f\n\x0b\n\x04\x04\x03\x02\0\x12\x03B\x04\x1d\n\r\n\ + \x05\x04\x03\x02\0\x04\x12\x04B\x04A\x11\n\x0c\n\x05\x04\x03\x02\0\x06\ + \x12\x03B\x04\x0f\n\x0c\n\x05\x04\x03\x02\0\x01\x12\x03B\x10\x18\n\x0c\n\ + \x05\x04\x03\x02\0\x03\x12\x03B\x1b\x1c\n\x0b\n\x04\x04\x03\x02\x01\x12\ + \x03C\x04\x12\n\r\n\x05\x04\x03\x02\x01\x04\x12\x04C\x04B\x1d\n\x0c\n\ + \x05\x04\x03\x02\x01\x05\x12\x03C\x04\n\n\x0c\n\x05\x04\x03\x02\x01\x01\ + \x12\x03C\x0b\r\n\x0c\n\x05\x04\x03\x02\x01\x03\x12\x03C\x10\x11\n\x0b\n\ + \x04\x04\x03\x02\x02\x12\x03D\x04\x14\n\r\n\x05\x04\x03\x02\x02\x04\x12\ + \x04D\x04C\x12\n\x0c\n\x05\x04\x03\x02\x02\x05\x12\x03D\x04\n\n\x0c\n\ + \x05\x04\x03\x02\x02\x01\x12\x03D\x0b\x0f\n\x0c\n\x05\x04\x03\x02\x02\ + \x03\x12\x03D\x12\x13\n\x0b\n\x04\x04\x03\x02\x03\x12\x03E\x04\x14\n\r\n\ + \x05\x04\x03\x02\x03\x04\x12\x04E\x04D\x14\n\x0c\n\x05\x04\x03\x02\x03\ + \x05\x12\x03E\x04\n\n\x0c\n\x05\x04\x03\x02\x03\x01\x12\x03E\x0b\x0f\n\ + \x0c\n\x05\x04\x03\x02\x03\x03\x12\x03E\x12\x13\n\x0b\n\x04\x04\x03\x02\ + \x04\x12\x03F\x04\x18\n\r\n\x05\x04\x03\x02\x04\x04\x12\x04F\x04E\x14\n\ + \x0c\n\x05\x04\x03\x02\x04\x05\x12\x03F\x04\n\n\x0c\n\x05\x04\x03\x02\ + \x04\x01\x12\x03F\x0b\x13\n\x0c\n\x05\x04\x03\x02\x04\x03\x12\x03F\x16\ + \x17\n\x0b\n\x04\x04\x03\x02\x05\x12\x03G\x04\x15\n\r\n\x05\x04\x03\x02\ + \x05\x04\x12\x04G\x04F\x18\n\x0c\n\x05\x04\x03\x02\x05\x05\x12\x03G\x04\ + \n\n\x0c\n\x05\x04\x03\x02\x05\x01\x12\x03G\x0b\x10\n\x0c\n\x05\x04\x03\ + \x02\x05\x03\x12\x03G\x13\x14\n\x0b\n\x04\x04\x03\x02\x06\x12\x03H\x04\ + \x1f\n\x0c\n\x05\x04\x03\x02\x06\x04\x12\x03H\x04\x0c\n\x0c\n\x05\x04\ + \x03\x02\x06\x06\x12\x03H\r\x12\n\x0c\n\x05\x04\x03\x02\x06\x01\x12\x03H\ + \x13\x1a\n\x0c\n\x05\x04\x03\x02\x06\x03\x12\x03H\x1d\x1e\n\x0b\n\x04\ + \x04\x03\x02\x07\x12\x03I\x04\x16\n\r\n\x05\x04\x03\x02\x07\x04\x12\x04I\ + \x04H\x1f\n\x0c\n\x05\x04\x03\x02\x07\x05\x12\x03I\x04\n\n\x0c\n\x05\x04\ + \x03\x02\x07\x01\x12\x03I\x0b\x11\n\x0c\n\x05\x04\x03\x02\x07\x03\x12\ + \x03I\x14\x15\n\x0b\n\x04\x04\x03\x02\x08\x12\x03J\x04\x1a\n\r\n\x05\x04\ + \x03\x02\x08\x04\x12\x04J\x04I\x16\n\x0c\n\x05\x04\x03\x02\x08\x06\x12\ + \x03J\x04\x0c\n\x0c\n\x05\x04\x03\x02\x08\x01\x12\x03J\r\x15\n\x0c\n\x05\ + \x04\x03\x02\x08\x03\x12\x03J\x18\x19\n\x0b\n\x04\x04\x03\x02\t\x12\x03K\ + \x04\x15\n\r\n\x05\x04\x03\x02\t\x04\x12\x04K\x04J\x1a\n\x0c\n\x05\x04\ + \x03\x02\t\x05\x12\x03K\x04\x08\n\x0c\n\x05\x04\x03\x02\t\x01\x12\x03K\t\ + \x0f\n\x0c\n\x05\x04\x03\x02\t\x03\x12\x03K\x12\x14\n\x0b\n\x04\x04\x03\ + \x02\n\x12\x03L\x04\x1c\n\r\n\x05\x04\x03\x02\n\x04\x12\x04L\x04K\x15\n\ + \x0c\n\x05\x04\x03\x02\n\x05\x12\x03L\x04\n\n\x0c\n\x05\x04\x03\x02\n\ + \x01\x12\x03L\x0b\x16\n\x0c\n\x05\x04\x03\x02\n\x03\x12\x03L\x19\x1b\n\ + \x0b\n\x04\x04\x03\x02\x0b\x12\x03M\x04\x17\n\r\n\x05\x04\x03\x02\x0b\ + \x04\x12\x04M\x04L\x1c\n\x0c\n\x05\x04\x03\x02\x0b\x05\x12\x03M\x04\t\n\ + \x0c\n\x05\x04\x03\x02\x0b\x01\x12\x03M\n\x11\n\x0c\n\x05\x04\x03\x02\ + \x0b\x03\x12\x03M\x14\x16\n\n\n\x02\x04\x04\x12\x04P\0T\x01\n\n\n\x03\ + \x04\x04\x01\x12\x03P\x08\x11\n\x0b\n\x04\x04\x04\x02\0\x12\x03Q\x04\x14\ + \n\r\n\x05\x04\x04\x02\0\x04\x12\x04Q\x04P\x13\n\x0c\n\x05\x04\x04\x02\0\ + \x05\x12\x03Q\x04\n\n\x0c\n\x05\x04\x04\x02\0\x01\x12\x03Q\x0b\x0f\n\x0c\ + \n\x05\x04\x04\x02\0\x03\x12\x03Q\x12\x13\n\x0b\n\x04\x04\x04\x02\x01\ + \x12\x03R\x04\x14\n\r\n\x05\x04\x04\x02\x01\x04\x12\x04R\x04Q\x14\n\x0c\ + \n\x05\x04\x04\x02\x01\x05\x12\x03R\x04\n\n\x0c\n\x05\x04\x04\x02\x01\ + \x01\x12\x03R\x0b\x0f\n\x0c\n\x05\x04\x04\x02\x01\x03\x12\x03R\x12\x13\n\ + \x0b\n\x04\x04\x04\x02\x02\x12\x03S\x04\x16\n\r\n\x05\x04\x04\x02\x02\ + \x04\x12\x04S\x04R\x14\n\x0c\n\x05\x04\x04\x02\x02\x05\x12\x03S\x04\n\n\ + \x0c\n\x05\x04\x04\x02\x02\x01\x12\x03S\x0b\x11\n\x0c\n\x05\x04\x04\x02\ + \x02\x03\x12\x03S\x14\x15\n\n\n\x02\x04\x05\x12\x04V\0Y\x01\n\n\n\x03\ + \x04\x05\x01\x12\x03V\x08\x11\n\x0b\n\x04\x04\x05\x02\0\x12\x03W\x04\x1e\ + \n\x0c\n\x05\x04\x05\x02\0\x04\x12\x03W\x04\x0c\n\x0c\n\x05\x04\x05\x02\ + \0\x05\x12\x03W\r\x13\n\x0c\n\x05\x04\x05\x02\0\x01\x12\x03W\x14\x19\n\ + \x0c\n\x05\x04\x05\x02\0\x03\x12\x03W\x1c\x1d\n\x0b\n\x04\x04\x05\x02\ + \x01\x12\x03X\x04!\n\x0c\n\x05\x04\x05\x02\x01\x04\x12\x03X\x04\x0c\n\ + \x0c\n\x05\x04\x05\x02\x01\x05\x12\x03X\r\x13\n\x0c\n\x05\x04\x05\x02\ + \x01\x01\x12\x03X\x14\x1c\n\x0c\n\x05\x04\x05\x02\x01\x03\x12\x03X\x1f\ + \x20\n\n\n\x02\x05\x02\x12\x04[\0a\x01\n\n\n\x03\x05\x02\x01\x12\x03[\ + \x05\x13\n\x0b\n\x04\x05\x02\x02\0\x12\x03\\\x04\x13\n\x0c\n\x05\x05\x02\ + \x02\0\x01\x12\x03\\\x04\x0b\n\x0c\n\x05\x05\x02\x02\0\x02\x12\x03\\\x11\ + \x12\n\x0b\n\x04\x05\x02\x02\x01\x12\x03]\x04\x13\n\x0c\n\x05\x05\x02\ + \x02\x01\x01\x12\x03]\x04\x0e\n\x0c\n\x05\x05\x02\x02\x01\x02\x12\x03]\ + \x11\x12\n\x0b\n\x04\x05\x02\x02\x02\x12\x03^\x04\x17\n\x0c\n\x05\x05\ + \x02\x02\x02\x01\x12\x03^\x04\x12\n\x0c\n\x05\x05\x02\x02\x02\x02\x12\ + \x03^\x15\x16\n\x0b\n\x04\x05\x02\x02\x03\x12\x03_\x04\x1e\n\x0c\n\x05\ + \x05\x02\x02\x03\x01\x12\x03_\x04\x19\n\x0c\n\x05\x05\x02\x02\x03\x02\ + \x12\x03_\x1c\x1d\n\x0b\n\x04\x05\x02\x02\x04\x12\x03`\x04!\n\x0c\n\x05\ + \x05\x02\x02\x04\x01\x12\x03`\x04\x1c\n\x0c\n\x05\x05\x02\x02\x04\x02\ + \x12\x03`\x1f\x20\n\n\n\x02\x04\x06\x12\x04c\0o\x01\n\n\n\x03\x04\x06\ + \x01\x12\x03c\x08\x12\n\x0b\n\x04\x04\x06\x02\0\x12\x03d\x04\x12\n\r\n\ + \x05\x04\x06\x02\0\x04\x12\x04d\x04c\x14\n\x0c\n\x05\x04\x06\x02\0\x05\ + \x12\x03d\x04\n\n\x0c\n\x05\x04\x06\x02\0\x01\x12\x03d\x0b\r\n\x0c\n\x05\ + \x04\x06\x02\0\x03\x12\x03d\x10\x11\n\x0b\n\x04\x04\x06\x02\x01\x12\x03e\ + \x04#\n\r\n\x05\x04\x06\x02\x01\x04\x12\x04e\x04d\x12\n\x0c\n\x05\x04\ + \x06\x02\x01\x06\x12\x03e\x04\x12\n\x0c\n\x05\x04\x06\x02\x01\x01\x12\ + \x03e\x13\x1e\n\x0c\n\x05\x04\x06\x02\x01\x03\x12\x03e!\"\nE\n\x04\x04\ + \x06\x02\x02\x12\x03g\x04\x17\x1a8\x20Used\x20in\x20`AddNode`,\x20`Remov\ + eNode`,\x20and\x20`AddLearnerNode`.\n\n\r\n\x05\x04\x06\x02\x02\x04\x12\ + \x04g\x04e#\n\x0c\n\x05\x04\x06\x02\x02\x05\x12\x03g\x04\n\n\x0c\n\x05\ + \x04\x06\x02\x02\x01\x12\x03g\x0b\x12\n\x0c\n\x05\x04\x06\x02\x02\x03\ + \x12\x03g\x15\x16\n\x0b\n\x04\x04\x06\x02\x03\x12\x03h\x04\x16\n\r\n\x05\ + \x04\x06\x02\x03\x04\x12\x04h\x04g\x17\n\x0c\n\x05\x04\x06\x02\x03\x05\ + \x12\x03h\x04\t\n\x0c\n\x05\x04\x06\x02\x03\x01\x12\x03h\n\x11\n\x0c\n\ + \x05\x04\x06\x02\x03\x03\x12\x03h\x14\x15\nN\n\x04\x04\x06\x02\x04\x12\ + \x03j\x04\x20\x1aA\x20Used\x20in\x20`BeginMembershipChange`\x20and\x20`F\ + inalizeMembershipChange`.\n\n\r\n\x05\x04\x06\x02\x04\x04\x12\x04j\x04h\ + \x16\n\x0c\n\x05\x04\x06\x02\x04\x06\x12\x03j\x04\r\n\x0c\n\x05\x04\x06\ + \x02\x04\x01\x12\x03j\x0e\x1b\n\x0c\n\x05\x04\x06\x02\x04\x03\x12\x03j\ + \x1e\x1f\n\xd0\x01\n\x04\x04\x06\x02\x05\x12\x03n\x04\x1b\x1a\xc2\x01\ + \x20Used\x20in\x20`BeginMembershipChange`\x20and\x20`FinalizeMembershipC\ + hange`.\n\x20Because\x20`RawNode::apply_conf_change`\x20takes\x20a\x20`C\ + onfChange`\x20instead\x20of\x20an\x20`Entry`\x20we\x20must\n\x20include\ + \x20this\x20index\x20so\x20it\x20can\x20be\x20known.\n\n\r\n\x05\x04\x06\ + \x02\x05\x04\x12\x04n\x04j\x20\n\x0c\n\x05\x04\x06\x02\x05\x05\x12\x03n\ + \x04\n\n\x0c\n\x05\x04\x06\x02\x05\x01\x12\x03n\x0b\x16\n\x0c\n\x05\x04\ + \x06\x02\x05\x03\x12\x03n\x19\x1ab\x06proto3\ "; static mut file_descriptor_proto_lazy: ::protobuf::lazy::Lazy<::protobuf::descriptor::FileDescriptorProto> = ::protobuf::lazy::Lazy { diff --git a/src/errors.rs b/src/errors.rs index 869367549..2e187872c 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -13,6 +13,7 @@ use std::error; use std::{cmp, io, result}; +use StateRole; use protobuf::ProtobufError; @@ -63,6 +64,18 @@ quick_error! { NotExists(id: u64, set: &'static str) { display("The node {} is not in the {} set.", id, set) } + /// The action given requires the node to be in a particular state role. + InvalidState(role: StateRole) { + display("Cannot complete that action while in {:?} role.", role) + } + /// The node attempted to transition to a new membership configuration while there was none pending. + NoPendingMembershipChange { + display("No pending membership change. Create a pending transition with `Raft::propose_membership_change` on the leader.") + } + /// An argument violates a calling contract. + ViolatesContract(contract: String) { + display("An argument violate a calling contract: {}", contract) + } } } diff --git a/src/lib.rs b/src/lib.rs index 9409815e2..f664b9974 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,10 @@ ## Creating a Raft node -You can use [`RawNode::new`](raw_node/struct.RawNode.html#method.new) to create the Raft node. To create the Raft node, you need to provide a [`Storage`](storage/trait.Storage.html) component, and a [`Config`](struct.Config.html) to the [`RawNode::new`](raw_node/struct.RawNode.html#method.new) function. +You can use [`RawNode::new`](raw_node/struct.RawNode.html#method.new) to create the Raft node. To +create the Raft node, you need to provide a [`Storage`](storage/trait.Storage.html) component, and +a [`Config`](struct.Config.html) to the [`RawNode::new`](raw_node/struct.RawNode.html#method.new) +function. ```rust use raft::{ @@ -58,7 +61,9 @@ node.raft.become_leader(); ## Ticking the Raft node -Use a timer to tick the Raft node at regular intervals. See the following example using Rust channel `recv_timeout` to drive the Raft node at least every 100ms, calling [`tick()`](raw_node/struct.RawNode.html#method.tick) each time. +Use a timer to tick the Raft node at regular intervals. See the following example using Rust +channel `recv_timeout` to drive the Raft node at least every 100ms, calling +[`tick()`](raw_node/struct.RawNode.html#method.tick) each time. ```rust # use raft::{Config, storage::MemStorage, raw_node::RawNode}; @@ -101,11 +106,18 @@ loop { ## Proposing to, and stepping the Raft node -Using the `propose` function you can drive the Raft node when the client sends a request to the Raft server. You can call `propose` to add the request to the Raft log explicitly. +Using the `propose` function you can drive the Raft node when the client sends a request to the +Raft server. You can call `propose` to add the request to the Raft log explicitly. -In most cases, the client needs to wait for a response for the request. For example, if the client writes a value to a key and wants to know whether the write succeeds or not, but the write flow is asynchronous in Raft, so the write log entry must be replicated to other followers, then committed and at last applied to the state machine, so here we need a way to notify the client after the write is finished. +In most cases, the client needs to wait for a response for the request. For example, if the +client writes a value to a key and wants to know whether the write succeeds or not, but the +write flow is asynchronous in Raft, so the write log entry must be replicated to other followers, +then committed and at last applied to the state machine, so here we need a way to notify the client +after the write is finished. -One simple way is to use a unique ID for the client request, and save the associated callback function in a hash map. When the log entry is applied, we can get the ID from the decoded entry, call the corresponding callback, and notify the client. +One simple way is to use a unique ID for the client request, and save the associated callback +function in a hash map. When the log entry is applied, we can get the ID from the decoded entry, +call the corresponding callback, and notify the client. You can call the `step` function when you receive the Raft messages from other nodes. @@ -165,11 +177,15 @@ loop { } ``` -In the above example, we use a channel to receive the `propose` and `step` messages. We only propose the request ID to the Raft log. In your own practice, you can embed the ID in your request and propose the encoded binary request data. +In the above example, we use a channel to receive the `propose` and `step` messages. We only +propose the request ID to the Raft log. In your own practice, you can embed the ID in your request +and propose the encoded binary request data. ## Processing the `Ready` State -When your Raft node is ticked and running, Raft should enter a `Ready` state. You need to first use `has_ready` to check whether Raft is ready. If yes, use the `ready` function to get a `Ready` state: +When your Raft node is ticked and running, Raft should enter a `Ready` state. You need to first use +`has_ready` to check whether Raft is ready. If yes, use the `ready` function to get a `Ready` +state: ```rust,ignore if !node.has_ready() { @@ -180,9 +196,11 @@ if !node.has_ready() { let mut ready = node.ready(); ``` -The `Ready` state contains quite a bit of information, and you need to check and process them one by one: +The `Ready` state contains quite a bit of information, and you need to check and process them one +by one: -1. Check whether `snapshot` is empty or not. If not empty, it means that the Raft node has received a Raft snapshot from the leader and we must apply the snapshot: +1. Check whether `snapshot` is empty or not. If not empty, it means that the Raft node has received +a Raft snapshot from the leader and we must apply the snapshot: ```rust,ignore if !raft::is_empty_snap(ready.snapshot()) { @@ -195,7 +213,8 @@ The `Ready` state contains quite a bit of information, and you need to check and ``` -2. Check whether `entries` is empty or not. If not empty, it means that there are newly added entries but has not been committed yet, we must append the entries to the Raft log: +2. Check whether `entries` is empty or not. If not empty, it means that there are newly added +entries but has not been committed yet, we must append the entries to the Raft log: ```rust,ignore if !ready.entries.is_empty() { @@ -205,7 +224,9 @@ The `Ready` state contains quite a bit of information, and you need to check and ``` -3. Check whether `hs` is empty or not. If not empty, it means that the `HardState` of the node has changed. For example, the node may vote for a new leader, or the commit index has been increased. We must persist the changed `HardState`: +3. Check whether `hs` is empty or not. If not empty, it means that the `HardState` of the node has +changed. For example, the node may vote for a new leader, or the commit index has been increased. +We must persist the changed `HardState`: ```rust,ignore if let Some(hs) = ready.hs() { @@ -214,7 +235,10 @@ The `Ready` state contains quite a bit of information, and you need to check and } ``` -4. Check whether `messages` is empty or not. If not, it means that the node will send messages to other nodes. There has been an optimization for sending messages: if the node is a leader, this can be done together with step 1 in parallel; if the node is not a leader, it needs to reply the messages to the leader after appending the Raft entries: +4. Check whether `messages` is empty or not. If not, it means that the node will send messages to +other nodes. There has been an optimization for sending messages: if the node is a leader, this can +be done together with step 1 in parallel; if the node is not a leader, it needs to reply the +messages to the leader after appending the Raft entries: ```rust,ignore if !is_leader { @@ -227,7 +251,9 @@ The `Ready` state contains quite a bit of information, and you need to check and } ``` -5. Check whether `committed_entires` is empty or not. If not, it means that there are some newly committed log entries which you must apply to the state machine. Of course, after applying, you need to update the applied index and resume `apply` later: +5. Check whether `committed_entires` is empty or not. If not, it means that there are some newly +committed log entries which you must apply to the state machine. Of course, after applying, you +need to update the applied index and resume `apply` later: ```rust,ignore if let Some(committed_entries) = ready.committed_entries.take() { @@ -258,6 +284,83 @@ The `Ready` state contains quite a bit of information, and you need to check and For more information, check out an [example](examples/single_mem_node/main.rs#L113-L179). +## Arbitrary Membership Changes + +> **Note:** This is an experimental feature. + +When building a resilient, scalable distributed system there is a strong need to be able to change +the membership of a peer group *dynamically, without downtime.* This Raft crate supports this via +**Joint Consensus** +([Raft paper, section 6](https://web.stanford.edu/~ouster/cgi-bin/papers/raft-atc14)). + +It permits resilient arbitrary dynamic membership changes. A membership change can do any or all of +the following: + +* Add peer (learner or voter) *n* to the group. +* Remove peer *n* from the group. +* Remove a leader (unmanaged, via stepdown) +* Promote a learner to a voter. +* Replace a node *n* with another node *m*. + +It (currently) does not: + +* Allow control of the replacement leader during a stepdown. +* Optionally roll back a change during a peer group pause where the new peer group configuration +fails. +* Provide automated promotion of newly added voters from learner to voter when they are caught up. +This must be done as a two stage process for now. + +> PRs to enable these are welcome! We'd love to mentor/support you through implementing it. + +This means it's possible to do: + +```rust +use raft::{Config, storage::MemStorage, raw_node::RawNode, eraftpb::{Message, ConfChange}}; +let config = Config { id: 1, peers: vec![1, 2], ..Default::default() }; +let mut node = RawNode::new(&config, MemStorage::default(), vec![]).unwrap(); +node.raft.become_candidate(); +node.raft.become_leader(); + +// Call this on the leader, or send the command via a normal `MsgPropose`. +node.raft.propose_membership_change(( + // Any IntoIterator. + // Voters + vec![1,3], // Remove 2, add 3. + // Learners + vec![4,5,6], // Add 4, 5, 6. +)).unwrap(); + +# let entry = &node.raft.raft_log.entries(2, 1).unwrap()[0]; +// ...Later when the begin entry is recieved from a `ready()` in the `entries` field... +let conf_change = protobuf::parse_from_bytes::(entry.get_data()) + .unwrap(); +node.raft.begin_membership_change(&conf_change).unwrap(); +assert!(node.raft.is_in_membership_change()); +assert!(node.raft.prs().voter_ids().contains(&2)); +assert!(node.raft.prs().voter_ids().contains(&3)); +# +# // We hide this since the user isn't really encouraged to blindly call this, but we'd like a short +# // example. +# node.raft.raft_log.commit_to(2); +# node.raft.commit_apply(2); +# +# let entry = &node.raft.raft_log.entries(3, 1).unwrap()[0]; +// ...Later, when the finalize entry is recieved from a `ready()` in the `entries` field... +let conf_change = protobuf::parse_from_bytes::(entry.get_data()) + .unwrap(); +node.raft.finalize_membership_change(&conf_change).unwrap(); +assert!(!node.raft.prs().voter_ids().contains(&2)); +assert!(node.raft.prs().voter_ids().contains(&3)); +assert!(!node.raft.is_in_membership_change()); +``` + +This process is a two-phase process, during the midst of it the peer group's leader is managing +**two independent, possibly overlapping peer sets**. + +> **Note:** In order to maintain resiliency guarantees (progress while a majority of both peer sets is +active), it is very important to wait until the entire peer group has exited the transition phase +before taking old, removed peers offline. + */ #![deny(clippy::all)] @@ -275,6 +378,8 @@ extern crate quick_error; #[cfg(test)] extern crate env_logger; extern crate rand; +#[macro_use] +extern crate getset; mod config; /// This module supplies the needed message types. However, it is autogenerated and thus cannot be @@ -297,7 +402,7 @@ pub mod util; pub use self::config::Config; pub use self::errors::{Error, Result, StorageError}; pub use self::log_unstable::Unstable; -pub use self::progress::{Inflights, Progress, ProgressSet, ProgressState}; +pub use self::progress::{Configuration, Inflights, Progress, ProgressSet, ProgressState}; pub use self::raft::{vote_resp_msg_type, Raft, SoftState, StateRole, INVALID_ID, INVALID_INDEX}; pub use self::raft_log::{RaftLog, NO_LIMIT}; pub use self::raw_node::{is_empty_snap, Peer, RawNode, Ready, SnapshotStatus}; diff --git a/src/progress.rs b/src/progress.rs index 16cd8259a..b12beabaf 100644 --- a/src/progress.rs +++ b/src/progress.rs @@ -25,7 +25,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use errors::Error; +use eraftpb::{ConfState, SnapshotMetadata}; +use errors::{Error, Result}; use hashbrown::hash_map::DefaultHashBuilder; use hashbrown::{HashMap, HashSet}; use std::cell::RefCell; @@ -54,12 +55,97 @@ impl Default for ProgressState { } } -#[derive(Clone, Debug, Default)] -struct Configuration { +/// A Raft internal representation of a Configuration. +/// +/// This is corollary to a ConfState, but optimized for `contains` calls. +#[derive(Clone, Debug, Default, PartialEq, Getters)] +pub struct Configuration { + /// The voter set. + #[get = "pub"] voters: HashSet, + /// The learner set. + #[get = "pub"] learners: HashSet, } +impl Configuration { + /// Create a new configuration with the given configuration. + pub fn new( + voters: impl IntoIterator, + learners: impl IntoIterator, + ) -> Self { + Self { + voters: voters.into_iter().collect(), + learners: learners.into_iter().collect(), + } + } +} + +impl From<(Iter1, Iter2)> for Configuration +where + Iter1: IntoIterator, + Iter2: IntoIterator, +{ + fn from((voters, learners): (Iter1, Iter2)) -> Self { + Self { + voters: voters.into_iter().collect(), + learners: learners.into_iter().collect(), + } + } +} + +impl From for Configuration { + fn from(conf_state: ConfState) -> Self { + Self { + voters: conf_state.get_nodes().iter().cloned().collect(), + learners: conf_state.get_learners().iter().cloned().collect(), + } + } +} + +impl From for ConfState { + fn from(conf: Configuration) -> Self { + let mut state = ConfState::default(); + state.set_nodes(conf.voters.iter().cloned().collect()); + state.set_learners(conf.learners.iter().cloned().collect()); + state + } +} + +impl Configuration { + fn with_capacity(voters: usize, learners: usize) -> Self { + Self { + voters: HashSet::with_capacity_and_hasher(voters, DefaultHashBuilder::default()), + learners: HashSet::with_capacity_and_hasher(learners, DefaultHashBuilder::default()), + } + } + + /// Validates that the configuration is not problematic. + /// + /// Namely: + /// * There can be no overlap of voters and learners. + /// * There must be at least one voter. + pub fn valid(&self) -> Result<()> { + if let Some(id) = self.voters.intersection(&self.learners).next() { + Err(Error::Exists(*id, "learners"))?; + } else if self.voters.is_empty() { + Err(Error::ConfigInvalid( + "There must be at least one voter.".into(), + ))?; + } + Ok(()) + } + + fn has_quorum(&self, potential_quorum: &HashSet) -> bool { + self.voters.intersection(potential_quorum).count() >= majority(self.voters.len()) + } + + /// Returns whether or not the given `id` is a member of this configuration. + pub fn contains(&self, id: u64) -> bool { + self.voters.contains(&id) || self.learners.contains(&id) + } +} + /// The status of an election according to a Candidate node. /// /// This is returned by `progress_set.election_status(vote_map)` @@ -75,10 +161,16 @@ pub enum CandidacyStatus { /// `ProgressSet` contains several `Progress`es, /// which could be `Leader`, `Follower` and `Learner`. -#[derive(Default, Clone)] +#[derive(Default, Clone, Getters)] pub struct ProgressSet { progress: HashMap, + /// The current configuration state of the cluster. + #[get = "pub"] configuration: Configuration, + /// The pending configuration, which will be adopted after the Finalize entry is applied. + #[get = "pub"] + next_configuration: Option, + configuration_capacity: (usize, usize), // A preallocated buffer for sorting in the minimally_commited_index function. // You should not depend on these values unless you just set them. // We use a cell to avoid taking a `&mut self`. @@ -88,32 +180,65 @@ pub struct ProgressSet { impl ProgressSet { /// Creates a new ProgressSet. pub fn new() -> Self { - ProgressSet { - progress: Default::default(), - configuration: Default::default(), - sort_buffer: Default::default(), - } + Self::with_capacity(0, 0) } - /// Create a progress sete with the specified sizes already reserved. + /// Create a progress set with the specified sizes already reserved. pub fn with_capacity(voters: usize, learners: usize) -> Self { ProgressSet { progress: HashMap::with_capacity_and_hasher( voters + learners, DefaultHashBuilder::default(), ), - configuration: Configuration { - voters: HashSet::with_capacity_and_hasher(voters, DefaultHashBuilder::default()), - learners: HashSet::with_capacity_and_hasher( - learners, - DefaultHashBuilder::default(), - ), - }, - sort_buffer: Default::default(), + sort_buffer: RefCell::from(Vec::with_capacity(voters)), + configuration_capacity: (voters, learners), + configuration: Configuration::with_capacity(voters, learners), + next_configuration: Option::default(), + } + } + + pub(crate) fn restore_snapmeta( + meta: &SnapshotMetadata, + next_idx: u64, + max_inflight: usize, + ) -> Self { + let mut prs = ProgressSet::new(); + let pr = Progress::new(next_idx, max_inflight); + meta.get_conf_state().get_nodes().iter().for_each(|id| { + prs.progress.insert(*id, pr.clone()); + prs.configuration.voters.insert(*id); + }); + meta.get_conf_state().get_learners().iter().for_each(|id| { + prs.progress.insert(*id, pr.clone()); + prs.configuration.learners.insert(*id); + }); + + if meta.pending_membership_change_index != 0 { + let mut next_configuration = Configuration::with_capacity(0, 0); + meta.get_pending_membership_change() + .get_nodes() + .iter() + .for_each(|id| { + prs.progress.insert(*id, pr.clone()); + next_configuration.voters.insert(*id); + }); + meta.get_pending_membership_change() + .get_learners() + .iter() + .for_each(|id| { + prs.progress.insert(*id, pr.clone()); + next_configuration.learners.insert(*id); + }); + prs.next_configuration = Some(next_configuration); } + prs.assert_progress_and_configuration_consistent(); + prs } /// Returns the status of voters. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] pub fn voters(&self) -> impl Iterator { let set = self.voter_ids(); @@ -121,6 +246,9 @@ impl ProgressSet { } /// Returns the status of learners. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] pub fn learners(&self) -> impl Iterator { let set = self.learner_ids(); @@ -128,33 +256,61 @@ impl ProgressSet { } /// Returns the mutable status of voters. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] pub fn voters_mut(&mut self) -> impl Iterator { - let ids = &self.configuration.voters; + let ids = self.voter_ids(); self.progress .iter_mut() .filter(move |(k, _)| ids.contains(k)) } /// Returns the mutable status of learners. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] pub fn learners_mut(&mut self) -> impl Iterator { - let ids = &self.configuration.learners; + let ids = self.learner_ids(); self.progress .iter_mut() .filter(move |(k, _)| ids.contains(k)) } /// Returns the ids of all known voters. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] - pub fn voter_ids(&self) -> &HashSet { - &self.configuration.voters + pub fn voter_ids(&self) -> HashSet { + match self.next_configuration { + Some(ref next) => self + .configuration + .voters + .union(&next.voters) + .cloned() + .collect::>(), + None => self.configuration.voters.clone(), + } } /// Returns the ids of all known learners. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] - pub fn learner_ids(&self) -> &HashSet { - &self.configuration.learners + pub fn learner_ids(&self) -> HashSet { + match self.next_configuration { + Some(ref next) => self + .configuration + .learners + .union(&next.learners) + .cloned() + .collect::>(), + None => self.configuration.learners.clone(), + } } /// Grabs a reference to the progress of a node. @@ -170,60 +326,109 @@ impl ProgressSet { } /// Returns an iterator across all the nodes and their progress. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] pub fn iter(&self) -> impl ExactSizeIterator { self.progress.iter() } /// Returns a mutable iterator across all the nodes and their progress. + /// + /// **Note:** Do not use this for majority/quorum calculation. The Raft node may be + /// transitioning to a new configuration and have two qourums. Use `has_quorum` instead. #[inline] pub fn iter_mut(&mut self) -> impl ExactSizeIterator { self.progress.iter_mut() } - /// Adds a voter node - pub fn insert_voter(&mut self, id: u64, pr: Progress) -> Result<(), Error> { - // If the progress exists already this is in error. - if self.progress.contains_key(&id) { - // Determine the correct error to return. - if self.learner_ids().contains(&id) { - return Err(Error::Exists(id, "learners")); - } + /// Adds a voter to the group. + /// + /// # Errors + /// + /// * `id` is in the voter set. + /// * `id` is in the learner set. + /// * There is a pending membership change. + pub fn insert_voter(&mut self, id: u64, pr: Progress) -> Result<()> { + debug!("Inserting voter with id {}.", id); + + if self.learner_ids().contains(&id) { + return Err(Error::Exists(id, "learners")); + } else if self.voter_ids().contains(&id) { return Err(Error::Exists(id, "voters")); + } else if self.is_in_membership_change() { + return Err(Error::ViolatesContract( + "There is a pending membership change.".into(), + )); } + self.configuration.voters.insert(id); self.progress.insert(id, pr); + self.assert_progress_and_configuration_consistent(); Ok(()) } - /// Adds a learner to the cluster - pub fn insert_learner(&mut self, id: u64, pr: Progress) -> Result<(), Error> { - // If the progress exists already this is in error. - if self.progress.contains_key(&id) { - // Determine the correct error to return. - if self.learner_ids().contains(&id) { - return Err(Error::Exists(id, "learners")); - } + /// Adds a learner to the group. + /// + /// # Errors + /// + /// * `id` is in the voter set. + /// * `id` is in the learner set. + /// * There is a pending membership change. + pub fn insert_learner(&mut self, id: u64, pr: Progress) -> Result<()> { + debug!("Inserting learner with id {}.", id); + + if self.learner_ids().contains(&id) { + return Err(Error::Exists(id, "learners")); + } else if self.voter_ids().contains(&id) { return Err(Error::Exists(id, "voters")); + } else if self.is_in_membership_change() { + return Err(Error::ViolatesContract( + "There is a pending membership change".into(), + )); } + self.configuration.learners.insert(id); self.progress.insert(id, pr); + self.assert_progress_and_configuration_consistent(); Ok(()) } /// Removes the peer from the set of voters or learners. - pub fn remove(&mut self, id: u64) -> Option { - self.configuration.voters.remove(&id); + /// + /// # Errors + /// + /// * There is a pending membership change. + pub fn remove(&mut self, id: u64) -> Result> { + debug!("Removing peer with id {}.", id); + + if self.is_in_membership_change() { + return Err(Error::ViolatesContract( + "There is a pending membership change.".into(), + )); + } + self.configuration.learners.remove(&id); + self.configuration.voters.remove(&id); let removed = self.progress.remove(&id); + self.assert_progress_and_configuration_consistent(); - removed + Ok(removed) } /// Promote a learner to a peer. - pub fn promote_learner(&mut self, id: u64) -> Result<(), Error> { + pub fn promote_learner(&mut self, id: u64) -> Result<()> { + debug!("Promote learner with id {}.", id); + + if self.is_in_membership_change() { + return Err(Error::ViolatesContract( + "There is a pending membership change.".into(), + )); + } + if !self.configuration.learners.remove(&id) { // Wasn't already a learner. We can't promote what doesn't exist. return Err(Error::NotExists(id, "learners")); @@ -232,6 +437,7 @@ impl ProgressSet { // Already existed, the caller should know this was a noop. return Err(Error::Exists(id, "voters")); } + self.assert_progress_and_configuration_consistent(); Ok(()) } @@ -247,9 +453,17 @@ impl ProgressSet { .progress .keys() .all(|v| self.configuration.learners.contains(v) - || self.configuration.voters.contains(v))); + || self.configuration.voters.contains(v) + || self + .next_configuration + .as_ref() + .map_or(false, |c| c.learners.contains(v)) + || self + .next_configuration + .as_ref() + .map_or(false, |c| c.voters.contains(v)))); assert_eq!( - self.configuration.voters.len() + self.configuration.learners.len(), + self.voter_ids().len() + self.learner_ids().len(), self.progress.len() ); } @@ -260,13 +474,28 @@ impl ProgressSet { pub fn maximal_committed_index(&self) -> u64 { let mut matched = self.sort_buffer.borrow_mut(); matched.clear(); - self.voters().for_each(|(_id, peer)| { + self.configuration.voters().iter().for_each(|id| { + let peer = &self.progress[id]; matched.push(peer.matched); }); // Reverse sort. matched.sort_by(|a, b| b.cmp(a)); - // Smallest that the majority has commited. - matched[matched.len() / 2] + let mut mci = matched[matched.len() / 2]; + + if let Some(next) = &self.next_configuration { + matched.clear(); + next.voters().iter().for_each(|id| { + let peer = &self.progress[id]; + matched.push(peer.matched); + }); + // Reverse sort. + matched.sort_by(|a, b| b.cmp(a)); + let next_mci = matched[matched.len() / 2]; + if next_mci < mci { + mci = next_mci; + } + } + mci } /// Returns the Candidate's eligibility in the current election. @@ -276,34 +505,38 @@ impl ProgressSet { /// or `Ineligible`, meaning the election can be concluded. pub fn candidacy_status<'a>( &self, - id: u64, votes: impl IntoIterator, ) -> CandidacyStatus { - let (accepted, total) = - votes - .into_iter() - .fold((0, 0), |(mut accepted, mut total), (_, nominated)| { - if *nominated { - accepted += 1; - } - total += 1; - (accepted, total) - }); - let quorum = majority(self.voter_ids().len()); - let rejected = total - accepted; - - info!( - "{} [quorum: {}] has received {} votes and {} vote rejections", - id, quorum, accepted, rejected, + let (accepts, rejects) = votes.into_iter().fold( + (HashSet::default(), HashSet::default()), + |(mut accepts, mut rejects), (&id, &accepted)| { + if accepted { + accepts.insert(id); + } else { + rejects.insert(id); + } + (accepts, rejects) + }, ); - if accepted >= quorum { - CandidacyStatus::Elected - } else if rejected == quorum { - CandidacyStatus::Ineligible - } else { - CandidacyStatus::Eligible - } + match self.next_configuration { + Some(ref next) => { + if next.has_quorum(&accepts) && self.configuration.has_quorum(&accepts) { + return CandidacyStatus::Elected; + } else if next.has_quorum(&rejects) || self.configuration.has_quorum(&rejects) { + return CandidacyStatus::Ineligible; + } + } + None => { + if self.configuration.has_quorum(&accepts) { + return CandidacyStatus::Elected; + } else if self.configuration.has_quorum(&rejects) { + return CandidacyStatus::Ineligible; + } + } + }; + + CandidacyStatus::Eligible } /// Determines if the current quorum is active according to the this raft node. @@ -311,26 +544,129 @@ impl ProgressSet { /// /// This should only be called by the leader. pub fn quorum_recently_active(&mut self, perspective_of: u64) -> bool { - let mut active = 0; + let mut active = HashSet::default(); for (&id, pr) in self.voters_mut() { if id == perspective_of { - active += 1; + active.insert(id); continue; } if pr.recent_active { - active += 1; + active.insert(id); } pr.recent_active = false; } for (&_id, pr) in self.learners_mut() { pr.recent_active = false; } - active >= majority(self.voter_ids().len()) + self.configuration.has_quorum(&active) && + // If `next` is `None` we don't consider it, so just `true` it. + self.next_configuration.as_ref().map(|next| next.has_quorum(&active)).unwrap_or(true) } /// Determine if a quorum is formed from the given set of nodes. + /// + /// This is the only correct way to verify you have reached a quorum for the whole group. + #[inline] pub fn has_quorum(&self, potential_quorum: &HashSet) -> bool { - potential_quorum.len() >= majority(self.voter_ids().len()) + self.configuration.has_quorum(potential_quorum) + && self + .next_configuration + .as_ref() + .map(|next| next.has_quorum(potential_quorum)) + // If `next` is `None` we don't consider it, so just `true` it. + .unwrap_or(true) + } + + /// Determine if the ProgressSet is represented by a transition state under Joint Consensus. + #[inline] + pub fn is_in_membership_change(&self) -> bool { + self.next_configuration.is_some() + } + + /// Enter a joint consensus state to transition to the specified configuration. + /// + /// The `next` provided should be derived from the `ConfChange` message. `progress` is used as + /// a basis for created peer `Progress` values. You are only expected to set `ins` from the + /// `raft.max_inflights` value. + /// + /// Once this state is entered the leader should replicate the `ConfChange` message. After the + /// majority of nodes, in both the current and the `next`, have committed the union state. At + /// this point the leader can call `finalize_config_transition` and replicate a message + /// commiting the change. + /// + /// Valid transitions: + /// * Non-existing -> Learner + /// * Non-existing -> Voter + /// * Learner -> Voter + /// * Learner -> Non-existing + /// * Voter -> Non-existing + /// + /// Errors: + /// * Voter -> Learner + /// * Member as voter and learner. + /// * Empty voter set. + pub(crate) fn begin_membership_change( + &mut self, + next: impl Into, + mut progress: Progress, + ) -> Result<()> { + let next = next.into(); + next.valid()?; + // Demotion check. + if let Some(&demoted) = self + .configuration + .voters + .intersection(&next.learners) + .next() + { + return Err(Error::Exists(demoted, "learners")); + } + debug!( + "Beginning membership change. End configuration will be {:?}", + next + ); + + // When a peer is first added/promoted, we should mark it as recently active. + // Otherwise, check_quorum may cause us to step down if it is invoked + // before the added peer has a chance to communicate with us. + progress.recent_active = true; + progress.paused = false; + for id in next.voters.iter().chain(&next.learners) { + // Now we create progresses for any that do not exist. + self.progress.entry(*id).or_insert_with(|| progress.clone()); + } + self.next_configuration = Some(next); + Ok(()) + } + + /// Finalizes the joint consensus state and transitions solely to the new state. + /// + /// This must be called only after calling `begin_membership_change` and after the majority + /// of peers in both the `current` and the `next` state have commited the changes. + pub fn finalize_membership_change(&mut self) -> Result<()> { + let next = self.next_configuration.take(); + match next { + None => Err(Error::NoPendingMembershipChange)?, + Some(next) => { + { + let pending = self + .configuration + .voters() + .difference(next.voters()) + .chain(self.configuration.learners().difference(next.learners())) + .cloned(); + for id in pending { + self.progress.remove(&id); + } + } + self.configuration = next; + debug!( + "Finalizing membership change. Config is {:?}", + self.configuration + ); + } + } + Ok(()) } } @@ -515,7 +851,7 @@ impl Progress { } /// A buffer of inflight messages. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)] pub struct Inflights { // the starting index in the buffer start: usize, @@ -526,6 +862,19 @@ pub struct Inflights { buffer: Vec, } +// The `buffer` must have it's capacity set correctly on clone, normally it does not. +impl Clone for Inflights { + fn clone(&self) -> Self { + let mut buffer = self.buffer.clone(); + buffer.reserve(self.buffer.capacity() - self.buffer.len()); + Inflights { + start: self.start, + count: self.count, + buffer, + } + } +} + impl Inflights { /// Creates a new buffer for inflight messages. pub fn new(cap: usize) -> Inflights { @@ -746,8 +1095,9 @@ mod test { // See https://github.com/pingcap/raft-rs/issues/125 #[cfg(test)] mod test_progress_set { - use Result; - use {Progress, ProgressSet}; + use hashbrown::HashSet; + + use {progress::Configuration, Progress, ProgressSet, Result}; const CANARY: u64 = 123; @@ -844,4 +1194,101 @@ mod test_progress_set { assert_eq!(pre, *set.get(1).expect("Peer should not have been deleted")); Ok(()) } + + #[test] + fn test_membership_change_configuration_remove_voter() -> Result<()> { + check_membership_change_configuration((vec![1, 2], vec![]), (vec![1], vec![])) + } + + #[test] + fn test_membership_change_configuration_remove_learner() -> Result<()> { + check_membership_change_configuration((vec![1], vec![2]), (vec![1], vec![])) + } + + #[test] + fn test_membership_change_configuration_conflicting_sets() { + assert!( + check_membership_change_configuration((vec![1], vec![]), (vec![1], vec![1]),).is_err() + ) + } + + #[test] + fn test_membership_change_configuration_empty_sets() { + assert!(check_membership_change_configuration((vec![], vec![]), (vec![], vec![])).is_err()) + } + + #[test] + fn test_membership_change_configuration_empty_voters() { + assert!( + check_membership_change_configuration((vec![1], vec![]), (vec![], vec![]),).is_err() + ) + } + + #[test] + fn test_membership_change_configuration_add_voter() -> Result<()> { + check_membership_change_configuration((vec![1], vec![]), (vec![1, 2], vec![])) + } + + #[test] + fn test_membership_change_configuration_add_learner() -> Result<()> { + check_membership_change_configuration((vec![1], vec![]), (vec![1], vec![2])) + } + + #[test] + fn test_membership_change_configuration_promote_learner() -> Result<()> { + check_membership_change_configuration((vec![1], vec![2]), (vec![1, 2], vec![])) + } + + fn check_membership_change_configuration( + start: (impl IntoIterator, impl IntoIterator), + end: (impl IntoIterator, impl IntoIterator), + ) -> Result<()> { + let start_voters = start.0.into_iter().collect::>(); + let start_learners = start.1.into_iter().collect::>(); + let end_voters = end.0.into_iter().collect::>(); + let end_learners = end.1.into_iter().collect::>(); + let transition_voters = start_voters + .union(&end_voters) + .cloned() + .collect::>(); + let transition_learners = start_learners + .union(&end_learners) + .cloned() + .collect::>(); + + let mut set = ProgressSet::default(); + let default_progress = Progress::new(0, 10); + + for starter in start_voters { + set.insert_voter(starter, default_progress.clone())?; + } + for starter in start_learners { + set.insert_learner(starter, default_progress.clone())?; + } + set.begin_membership_change( + Configuration::new(end_voters.clone(), end_learners.clone()), + default_progress, + )?; + assert!(set.is_in_membership_change()); + assert_eq!( + set.voter_ids(), + transition_voters, + "Transition state voters inaccurate" + ); + assert_eq!( + set.learner_ids(), + transition_learners, + "Transition state learners inaccurate." + ); + + set.finalize_membership_change()?; + assert!(!set.is_in_membership_change()); + assert_eq!(set.voter_ids(), end_voters, "End state voters inaccurate"); + assert_eq!( + set.learner_ids(), + end_learners, + "End state learners inaccurate" + ); + Ok(()) + } } diff --git a/src/raft.rs b/src/raft.rs index 7079dfd63..3896c4111 100644 --- a/src/raft.rs +++ b/src/raft.rs @@ -27,13 +27,16 @@ use std::cmp; -use eraftpb::{Entry, EntryType, HardState, Message, MessageType, Snapshot}; +use eraftpb::{ + ConfChange, ConfChangeType, Entry, EntryType, HardState, Message, MessageType, Snapshot, +}; use hashbrown::{HashMap, HashSet}; +use protobuf; use protobuf::RepeatedField; use rand::{self, Rng}; use super::errors::{Error, Result, StorageError}; -use super::progress::{CandidacyStatus, Progress, ProgressSet, ProgressState}; +use super::progress::{CandidacyStatus, Configuration, Progress, ProgressSet, ProgressState}; use super::raft_log::{self, RaftLog}; use super::read_only::{ReadOnly, ReadOnlyOption, ReadState}; use super::storage::Storage; @@ -84,7 +87,7 @@ pub struct SoftState { /// A struct that represents the raft consensus itself. Stores details concerning the current /// and possible state the system can take. -#[derive(Default)] +#[derive(Default, Getters)] pub struct Raft { /// The current election term. pub term: u64, @@ -135,13 +138,34 @@ pub struct Raft { pub lead_transferee: Option, /// Only one conf change may be pending (in the log, but not yet - /// applied) at a time. This is enforced via pending_conf_index, which + /// applied) at a time. This is enforced via `pending_conf_index`, which /// is set to a value >= the log index of the latest pending /// configuration change (if any). Config changes are only allowed to /// be proposed if the leader's applied index is greater than this /// value. + /// + /// This value is conservatively set in cases where there may be a configuration change pending, + /// but scanning the log is possibly expensive. This implies that the index stated here may not + /// necessarily be a config change entry, and it may not be a `BeginMembershipChange` entry, even if + /// we set this to one. pub pending_conf_index: u64, + /// The last `BeginMembershipChange` entry. Once we make this change we exit the joint state. + /// + /// This is different than `pending_conf_index` since it is more specific, and also exact. + /// While `pending_conf_index` is conservatively set at times to ensure safety in the + /// one-by-one change method, in joint consensus based changes we track the state exactly. The + /// index here **must** only be set when a `BeginMembershipChange` is present at that index. + /// + /// # Caveats + /// + /// It is important that whenever this is set that `pending_conf_index` is also set to the + /// value if it is greater than the existing value. + /// + /// **Use `Raft::set_pending_membership_change()` to change this value.** + #[get = "pub"] + pending_membership_change: Option, + /// The queue of read-only requests. pub read_only: ReadOnly, @@ -208,8 +232,8 @@ impl Raft { #[allow(clippy::new_ret_no_self)] pub fn new(c: &Config, store: T) -> Result> { c.validate()?; - let rs = store.initial_state()?; - let conf_state = &rs.conf_state; + let raft_state = store.initial_state()?; + let conf_state = &raft_state.conf_state; let raft_log = RaftLog::new(store, c.tag.clone()); let mut peers: &[u64] = &c.peers; let mut learners: &[u64] = &c.learners; @@ -247,6 +271,7 @@ impl Raft { term: Default::default(), election_elapsed: Default::default(), pending_conf_index: Default::default(), + pending_membership_change: Default::default(), vote: Default::default(), heartbeat_elapsed: Default::default(), randomized_election_timeout: 0, @@ -271,24 +296,37 @@ impl Raft { } } - if rs.hard_state != HardState::new() { - r.load_state(&rs.hard_state); + if raft_state.hard_state != HardState::new() { + r.load_state(&raft_state.hard_state); } if c.applied > 0 { - r.raft_log.applied_to(c.applied); + r.commit_apply(c.applied); } let term = r.term; r.become_follower(term, INVALID_ID); + + // Used to resume Joint Consensus Changes + let pending_conf_state = raft_state.pending_conf_state(); + let pending_conf_state_start_index = raft_state.pending_conf_state_start_index(); + match (pending_conf_state, pending_conf_state_start_index) { + (Some(state), Some(idx)) => { + r.begin_membership_change(&ConfChange::from((*idx, state.clone())))?; + } + (None, None) => (), + _ => unreachable!("Should never find pending_conf_change without an index."), + }; + info!( "{} newRaft [peers: {:?}, term: {:?}, commit: {}, applied: {}, last_index: {}, \ - last_term: {}]", + last_term: {}, pending_membership_change: {:?}]", r.tag, r.prs().voters().collect::>(), r.term, r.raft_log.committed, r.raft_log.get_applied(), r.raft_log.last_index(), - r.raft_log.last_term() + r.raft_log.last_term(), + r.pending_membership_change(), ); Ok(r) } @@ -362,6 +400,11 @@ impl Raft { self.heartbeat_timeout } + /// Fetch the number of ticks elapsed since last heartbeat. + pub fn get_heartbeat_elapsed(&self) -> usize { + self.heartbeat_elapsed + } + /// Return the length of the current randomized election timeout. pub fn get_randomized_election_timeout(&self) -> usize { self.randomized_election_timeout @@ -373,8 +416,35 @@ impl Raft { self.skip_bcast_commit = skip; } - // send persists state to stable storage and then sends to its mailbox. + /// Set when the peer began a joint consensus change. + /// + /// This will also set `pending_conf_index` if it is larger than the existing number. + #[inline] + fn set_pending_membership_change(&mut self, maybe_change: impl Into>) { + let maybe_change = maybe_change.into(); + if let Some(ref change) = maybe_change { + let index = change.get_start_index(); + assert!(self.pending_membership_change.is_none() || index == self.pending_conf_index); + if index > self.pending_conf_index { + self.pending_conf_index = index; + } + } + self.pending_membership_change = maybe_change.clone(); + } + + /// Get the index which the pending membership change started at. + /// + /// > **Note:** This is an experimental feature. + #[inline] + pub fn began_membership_change_at(&self) -> Option { + self.pending_membership_change + .as_ref() + .map(|v| v.get_start_index()) + } + + /// send persists state to stable storage and then sends to its mailbox. fn send(&mut self, mut m: Message) { + debug!("Sending from {} to {}: {:?}", self.id, m.get_to(), m); m.set_from(self.id); if m.get_msg_type() == MessageType::MsgRequestVote || m.get_msg_type() == MessageType::MsgRequestPreVote @@ -503,6 +573,11 @@ impl Raft { /// Sends RPC, with entries to the given peer. pub fn send_append(&mut self, to: u64, pr: &mut Progress) { if pr.is_paused() { + trace!( + "Skipping sending to {}, it's paused. Progress: {:?}", + to, + pr + ); return; } let term = self.raft_log.term(pr.next_idx - 1); @@ -511,6 +586,13 @@ impl Raft { m.set_to(to); if term.is_err() || ents.is_err() { // send snapshot if we failed to get term or entries + trace!( + "{} Skipping sending to {}, term: {:?}, ents: {:?}", + self.tag, + to, + term, + ents, + ); if !self.prepare_send_snapshot(&mut m, pr, to) { return; } @@ -573,6 +655,45 @@ impl Raft { self.raft_log.maybe_commit(mci, self.term) } + /// Commit that the Raft peer has applied up to the given index. + /// + /// Registers the new applied index to the Raft log. + /// + /// # Hooks + /// + /// * Post: Checks to see if it's time to finalize a Joint Consensus state. + pub fn commit_apply(&mut self, applied: u64) { + #[allow(deprecated)] + self.raft_log.applied_to(applied); + + // Check to see if we need to finalize a Joint Consensus state now. + let start_index = self + .pending_membership_change + .as_ref() + .map(|v| Some(v.get_start_index())) + .unwrap_or(None); + + if let Some(index) = start_index { + // Invariant: We know that if we have commited past some index, we can also commit that index. + if applied >= index && self.state == StateRole::Leader { + // We must replicate the commit entry. + self.append_finalize_conf_change_entry(); + } + } + } + + fn append_finalize_conf_change_entry(&mut self) { + let mut conf_change = ConfChange::new(); + conf_change.set_change_type(ConfChangeType::FinalizeMembershipChange); + let data = protobuf::Message::write_to_bytes(&conf_change).unwrap(); + let mut entry = Entry::new(); + entry.set_entry_type(EntryType::EntryConfChange); + entry.set_data(data); + // Index/Term set here. + self.append_entry(&mut [entry]); + self.bcast_append(); + } + /// Resets the current node to a given term. pub fn reset(&mut self, term: u64) { if self.term != term { @@ -734,6 +855,7 @@ impl Raft { /// /// Panics if this is a follower node. pub fn become_leader(&mut self) { + trace!("ENTER become_leader"); assert_ne!( self.state, StateRole::Follower, @@ -759,7 +881,30 @@ impl Raft { self.pending_conf_index = self.raft_log.last_index(); self.append_entry(&mut [Entry::new()]); + + // In most cases, we append only a new entry marked with an index and term. + // In the specific case of a node recovering while in the middle of a membership change, + // and the finalization entry may have been lost, we must also append that, since it + // would be overwritten by the term change. + let change_start_index = self + .pending_membership_change + .as_ref() + .map(|v| Some(v.get_start_index())) + .unwrap_or(None); + if let Some(index) = change_start_index { + trace!( + "Checking if we need to finalize again..., began: {}, applied: {}, committed: {}", + index, + self.raft_log.applied, + self.raft_log.committed + ); + if index <= self.raft_log.committed { + self.append_finalize_conf_change_entry(); + } + } + info!("{} became leader at term {}", self.tag, self.term); + trace!("EXIT become_leader"); } fn num_pending_conf(&self, ents: &[Entry]) -> usize { @@ -787,7 +932,7 @@ impl Raft { self.id, vote_msg, self_id, self.term ); self.register_vote(self_id, acceptance); - if let CandidacyStatus::Elected = self.prs().candidacy_status(self.id, &self.votes) { + if let CandidacyStatus::Elected = self.prs().candidacy_status(&self.votes) { // We won the election after voting for ourselves (which must mean that // this is a single-node cluster). Advance to the next state. if campaign_type == CAMPAIGN_PRE_ELECTION { @@ -849,6 +994,10 @@ impl Raft { // if a server receives RequestVote request within the minimum election // timeout of hearing from a current leader, it does not update its term // or grant its vote + // + // This is included in the 3rd concern for Joint Consensus, where if another + // peer is removed from the cluster it may try to hold elections and disrupt + // stability. info!( "{} [logterm: {}, index: {}, vote: {}] ignored {:?} vote from \ {} [logterm: {}, index: {}] at term {}: lease is not expired \ @@ -1043,7 +1192,105 @@ impl Raft { StateRole::Leader => self.step_leader(m)?, }, } + Ok(()) + } + + /// Apply a `BeginMembershipChange` variant `ConfChange`. + /// + /// > **Note:** This is an experimental feature. + /// + /// When a Raft node applies this variant of a configuration change it will adopt a joint + /// configuration state until the membership change is finalized. + /// + /// During this time the `Raft` will have two, possibly overlapping, cooperating quorums for + /// both elections and log replication. + /// + /// # Errors + /// + /// * `ConfChange.change_type` is not `BeginMembershipChange` + /// * `ConfChange.configuration` does not exist. + /// * `ConfChange.start_index` does not exist. It **must** equal the index of the + /// corresponding entry. + #[inline(always)] + pub fn begin_membership_change(&mut self, conf_change: &ConfChange) -> Result<()> { + if conf_change.get_change_type() != ConfChangeType::BeginMembershipChange { + return Err(Error::ViolatesContract(format!( + "{:?} != BeginMembershipChange", + conf_change.get_change_type() + ))); + } + let configuration = if conf_change.has_configuration() { + conf_change.get_configuration().clone() + } else { + return Err(Error::ViolatesContract( + "!ConfChange::has_configuration()".into(), + )); + }; + if conf_change.get_start_index() == 0 { + return Err(Error::ViolatesContract( + "!ConfChange::has_start_index()".into(), + )); + }; + + self.set_pending_membership_change(conf_change.clone()); + let max_inflights = self.max_inflight; + self.mut_prs() + .begin_membership_change(configuration, Progress::new(1, max_inflights))?; + Ok(()) + } + + /// Apply a `FinalizeMembershipChange` variant `ConfChange`. + /// + /// > **Note:** This is an experimental feature. + /// + /// When a Raft node applies this variant of a configuration change it will finalize the + /// transition begun by [`begin_membership_change`]. + /// + /// Once this is called the Raft will no longer have two, possibly overlapping, cooperating + /// qourums. + /// + /// # Errors + /// + /// * This Raft is not in a configuration change via `begin_membership_change`. + /// * `ConfChange.change_type` is not a `FinalizeMembershipChange`. + /// * `ConfChange.configuration` value should not exist. + /// * `ConfChange.start_index` value should not exist. + #[inline(always)] + pub fn finalize_membership_change(&mut self, conf_change: &ConfChange) -> Result<()> { + if conf_change.get_change_type() != ConfChangeType::FinalizeMembershipChange { + return Err(Error::ViolatesContract(format!( + "{:?} != BeginMembershipChange", + conf_change.get_change_type() + ))); + } + if conf_change.has_configuration() { + return Err(Error::ViolatesContract( + "ConfChange::has_configuration()".into(), + )); + }; + let leader_in_new_set = self + .prs() + .next_configuration() + .as_ref() + .map(|config| config.contains(self.leader_id)) + .ok_or_else(|| Error::NoPendingMembershipChange)?; + + // Joint Consensus, in the Raft paper, states the leader should step down and become a + // follower if it is removed during a transition. + if !leader_in_new_set { + let last_term = self.raft_log.last_term(); + if self.state == StateRole::Leader { + self.become_follower(last_term, INVALID_ID); + } else { + // It's no longer safe to lookup the ID in the ProgressSet, remove it. + self.leader_id = INVALID_ID; + } + } + self.mut_prs().finalize_membership_change()?; + // Ensure we reset this on *any* node, since the leader might have failed + // and we don't want to finalize twice. + self.set_pending_membership_change(None); Ok(()) } @@ -1526,7 +1773,7 @@ impl Raft { self.term ); self.register_vote(from_id, acceptance); - match self.prs().candidacy_status(self.id, &self.votes) { + match self.prs().candidacy_status(&self.votes) { CandidacyStatus::Elected => { if self.state == StateRole::PreCandidate { self.campaign(CAMPAIGN_ELECTION); @@ -1652,6 +1899,7 @@ impl Raft { /// For a given message, append the entries to the log. pub fn handle_append_entries(&mut self, m: &Message) { if m.get_index() < self.raft_log.committed { + debug!("{} Got message with lower index than committed.", self.tag); let mut to_send = Message::new(); to_send.set_to(m.get_from()); to_send.set_msg_type(MessageType::MsgAppendResponse); @@ -1674,10 +1922,10 @@ impl Raft { } None => { debug!( - "{} [logterm: {}, index: {}] rejected msgApp [logterm: {}, index: {}] \ + "{} [logterm: {:?}, index: {}] rejected msgApp [logterm: {}, index: {}] \ from {}", self.tag, - self.raft_log.term(m.get_index()).unwrap_or(0), + self.raft_log.term(m.get_index()), m.get_index(), m.get_log_term(), m.get_index(), @@ -1709,8 +1957,8 @@ impl Raft { ); if self.restore(m.take_snapshot()) { info!( - "{} [commit: {}] restored snapshot [index: {}, term: {}]", - self.tag, self.raft_log.committed, sindex, sterm + "{} [commit: {}, term: {}] restored snapshot [index: {}, term: {}]", + self.tag, self.term, self.raft_log.committed, sindex, sterm ); let mut to_send = Message::new(); to_send.set_to(m.get_from()); @@ -1773,26 +2021,20 @@ impl Raft { meta.get_term() ); - let nodes = meta.get_conf_state().get_nodes(); - let learners = meta.get_conf_state().get_learners(); - self.prs = Some(ProgressSet::with_capacity(nodes.len(), learners.len())); - - for &(is_learner, nodes) in &[(false, nodes), (true, learners)] { - for &n in nodes { - let next_index = self.raft_log.last_index() + 1; - let mut matched = 0; - if n == self.id { - matched = next_index - 1; - self.is_learner = is_learner; - } - self.set_progress(n, matched, next_index, is_learner); - info!( - "{} restored progress of {} [{:?}]", - self.tag, - n, - self.prs().get(n) - ); - } + let next_idx = self.raft_log.last_index() + 1; + let mut prs = ProgressSet::restore_snapmeta(meta, next_idx, self.max_inflight); + prs.get_mut(self.id).unwrap().matched = next_idx - 1; + if self.is_learner && prs.configuration().voters().contains(&self.id) { + self.is_learner = false; + } + self.prs = Some(prs); + if meta.get_pending_membership_change_index() > 0 { + let cs = meta.get_pending_membership_change().clone(); + let mut conf_change = ConfChange::new(); + conf_change.set_change_type(ConfChangeType::BeginMembershipChange); + conf_change.set_configuration(cs); + conf_change.set_start_index(meta.get_pending_membership_change_index()); + self.pending_membership_change = Some(conf_change); } None } @@ -1816,7 +2058,7 @@ impl Raft { /// This method can be false positive. #[inline] pub fn has_pending_conf(&self) -> bool { - self.pending_conf_index > self.raft_log.applied + self.pending_conf_index > self.raft_log.applied || self.pending_membership_change.is_some() } /// Specifies if the commit should be broadcast. @@ -1830,6 +2072,70 @@ impl Raft { self.prs().voter_ids().contains(&self.id) } + /// Propose that the peer group change its active set to a new set. + /// + /// > **Note:** This is an experimental feature. + /// + /// ```rust + /// use raft::{Raft, Config, storage::MemStorage, eraftpb::ConfState}; + /// let config = Config { + /// id: 1, + /// peers: vec![1], + /// ..Default::default() + /// }; + /// let mut raft = Raft::new(&config, MemStorage::default()).unwrap(); + /// raft.become_candidate(); + /// raft.become_leader(); // It must be a leader! + /// + /// let mut conf = ConfState::default(); + /// conf.set_nodes(vec![1,2,3]); + /// conf.set_learners(vec![4]); + /// if let Err(e) = raft.propose_membership_change(conf) { + /// panic!("{}", e); + /// } + /// ``` + /// + /// # Errors + /// + /// * This Peer is not leader. + /// * `voters` and `learners` are not mutually exclusive. + /// * `voters` is empty. + pub fn propose_membership_change(&mut self, config: impl Into) -> Result<()> { + if self.state != StateRole::Leader { + return Err(Error::InvalidState(self.state)); + } + let config = config.into(); + config.valid()?; + debug!( + "Replicating SetNodes with voters ({:?}), learners ({:?}).", + config.voters(), + config.learners() + ); + let destination_index = self.raft_log.last_index() + 1; + // Prep a configuration change to append. + let mut conf_change = ConfChange::new(); + conf_change.set_change_type(ConfChangeType::BeginMembershipChange); + conf_change.set_configuration(config.into()); + conf_change.set_start_index(destination_index); + let data = protobuf::Message::write_to_bytes(&conf_change)?; + let mut entry = Entry::new(); + entry.set_entry_type(EntryType::EntryConfChange); + entry.set_data(data); + let mut message = Message::new(); + message.set_msg_type(MessageType::MsgPropose); + message.set_from(self.id); + message.set_index(destination_index); + message.set_entries(RepeatedField::from_vec(vec![entry])); + // `append_entry` sets term, index for us. + self.step(message)?; + Ok(()) + } + + /// # Errors + /// + /// * `id` is already a voter. + /// * `id` is already a learner. + /// * There is a pending membership change. (See `is_in_membership_change()`) fn add_voter_or_learner(&mut self, id: u64, learner: bool) -> Result<()> { debug!( "Adding node (learner: {}) with ID {} to peers.", @@ -1861,24 +2167,39 @@ impl Raft { } /// Adds a new node to the cluster. - // TODO: Return an error on a redundant insert. - pub fn add_node(&mut self, id: u64) { - self.add_voter_or_learner(id, false).ok(); + /// + /// # Errors + /// + /// * `id` is already a voter. + /// * `id` is already a learner. + /// * There is a pending membership change. (See `is_in_membership_change()`) + pub fn add_node(&mut self, id: u64) -> Result<()> { + self.add_voter_or_learner(id, false) } /// Adds a learner node. - // TODO: Return an error on a redundant insert. - pub fn add_learner(&mut self, id: u64) { - self.add_voter_or_learner(id, true).ok(); + /// + /// # Errors + /// + /// * `id` is already a voter. + /// * `id` is already a learner. + /// * There is a pending membership change. (See `is_in_membership_change()`) + pub fn add_learner(&mut self, id: u64) -> Result<()> { + self.add_voter_or_learner(id, true) } /// Removes a node from the raft. - pub fn remove_node(&mut self, id: u64) { - self.mut_prs().remove(id); + /// + /// # Errors + /// + /// * `id` is not a voter or learner. + /// * There is a pending membership change. (See `is_in_membership_change()`) + pub fn remove_node(&mut self, id: u64) -> Result<()> { + self.mut_prs().remove(id)?; // do not try to commit or abort transferring if there are no nodes in the cluster. if self.prs().voter_ids().is_empty() && self.prs().learner_ids().is_empty() { - return; + return Ok(()); } // The quorum size is now smaller, so see if any pending entries can @@ -1888,8 +2209,10 @@ impl Raft { } // If the removed node is the lead_transferee, then abort the leadership transferring. if self.state == StateRole::Leader && self.lead_transferee == Some(id) { - self.abort_leader_transfer() + self.abort_leader_transfer(); } + + Ok(()) } /// Updates the progress of the learner or voter. @@ -1982,4 +2305,9 @@ impl Raft { pub fn abort_leader_transfer(&mut self) { self.lead_transferee = None; } + + /// Determine if the Raft is in a transition state under Joint Consensus. + pub fn is_in_membership_change(&self) -> bool { + self.prs().is_in_membership_change() + } } diff --git a/src/raft_log.rs b/src/raft_log.rs index a9529fd88..b348f57a2 100644 --- a/src/raft_log.rs +++ b/src/raft_log.rs @@ -260,6 +260,8 @@ impl RaftLog { /// # Panics /// /// Panics if the value passed in is not new or known. + #[deprecated = "Call raft::commit_apply(idx) instead. Joint Consensus requires an on-apply hook to + finalize a configuration change. This will become internal API in future versions."] pub fn applied_to(&mut self, idx: u64) { if idx == 0 { return; @@ -295,6 +297,11 @@ impl RaftLog { /// Appends a set of entries to the unstable list. pub fn append(&mut self, ents: &[Entry]) -> u64 { + trace!( + "{} Entries being appended to unstable list: {:?}", + self.tag, + ents + ); if ents.is_empty() { return self.last_index(); } @@ -417,6 +424,7 @@ impl RaftLog { /// Attempts to commit the index and term and returns whether it did. pub fn maybe_commit(&mut self, max_index: u64, term: u64) -> bool { if max_index > self.committed && self.term(max_index).unwrap_or(0) == term { + debug!("Committing index {}", max_index); self.commit_to(max_index); true } else { @@ -667,6 +675,7 @@ mod test { "maybe_commit return false" ); let committed = raft_log.committed; + #[allow(deprecated)] raft_log.applied_to(committed); let offset = 500u64; raft_log.store.wl().compact(offset).expect("compact failed"); @@ -916,6 +925,7 @@ mod test { let mut raft_log = new_raft_log(store); raft_log.append(&ents); raft_log.maybe_commit(5, 1); + #[allow(deprecated)] raft_log.applied_to(applied); let next_entries = raft_log.next_entries(); @@ -940,6 +950,7 @@ mod test { let mut raft_log = new_raft_log(store); raft_log.append(&ents); raft_log.maybe_commit(5, 1); + #[allow(deprecated)] raft_log.applied_to(applied); let actual_has_next = raft_log.has_next_entries(); @@ -1318,6 +1329,7 @@ mod test { let mut raft_log = new_raft_log(store); raft_log.maybe_commit(last_index, 0); let committed = raft_log.committed; + #[allow(deprecated)] raft_log.applied_to(committed); for (j, idx) in compact.into_iter().enumerate() { diff --git a/src/raw_node.rs b/src/raw_node.rs index c31c4f5c5..faef450c2 100644 --- a/src/raw_node.rs +++ b/src/raw_node.rs @@ -251,7 +251,7 @@ impl RawNode { rn.raft.raft_log.append(&ents); rn.raft.raft_log.committed = ents.len() as u64; for peer in peers { - rn.raft.add_node(peer.id); + rn.raft.add_node(peer.id)?; } } rn.prev_ss = rn.raft.soft_state(); @@ -287,7 +287,7 @@ impl RawNode { } fn commit_apply(&mut self, applied: u64) { - self.raft.raft_log.applied_to(applied); + self.raft.commit_apply(applied); } /// Tick advances the internal logical clock by a single tick. @@ -332,23 +332,34 @@ impl RawNode { } /// Takes the conf change and applies it. - pub fn apply_conf_change(&mut self, cc: &ConfChange) -> ConfState { - if cc.get_node_id() == INVALID_ID { + /// + /// # Panics + /// + /// In the case of `BeginMembershipChange` or `FinalizeConfChange` returning errors this will panic. + /// + /// For a safe interface for these directly call `this.raft.begin_membership_change(entry)` or + /// `this.raft.finalize_membership_change(entry)` respectively. + pub fn apply_conf_change(&mut self, cc: &ConfChange) -> Result { + if cc.get_node_id() == INVALID_ID + && cc.get_change_type() != ConfChangeType::BeginMembershipChange + { let mut cs = ConfState::new(); cs.set_nodes(self.raft.prs().voter_ids().iter().cloned().collect()); cs.set_learners(self.raft.prs().learner_ids().iter().cloned().collect()); - return cs; + return Ok(cs); } let nid = cc.get_node_id(); match cc.get_change_type() { - ConfChangeType::AddNode => self.raft.add_node(nid), - ConfChangeType::AddLearnerNode => self.raft.add_learner(nid), - ConfChangeType::RemoveNode => self.raft.remove_node(nid), - } - let mut cs = ConfState::new(); - cs.set_nodes(self.raft.prs().voter_ids().iter().cloned().collect()); - cs.set_learners(self.raft.prs().learner_ids().iter().cloned().collect()); - cs + ConfChangeType::AddNode => self.raft.add_node(nid)?, + ConfChangeType::AddLearnerNode => self.raft.add_learner(nid)?, + ConfChangeType::RemoveNode => self.raft.remove_node(nid)?, + ConfChangeType::BeginMembershipChange => self.raft.begin_membership_change(cc)?, + ConfChangeType::FinalizeMembershipChange => { + self.raft.mut_prs().finalize_membership_change()? + } + }; + + Ok(self.raft.prs().configuration().clone().into()) } /// Step advances the state machine using the given message. @@ -409,6 +420,7 @@ impl RawNode { /// HasReady called when RawNode user need to check if any Ready pending. /// Checking logic in this method should be consistent with Ready.containsUpdates(). + #[inline] pub fn has_ready(&self) -> bool { self.has_ready_since(None) } @@ -423,7 +435,6 @@ impl RawNode { /// last Ready results. pub fn advance(&mut self, rd: Ready) { self.advance_append(rd); - let commit_idx = self.prev_hs.get_commit(); if commit_idx != 0 { // In most cases, prevHardSt and rd.HardState will be the same @@ -440,16 +451,19 @@ impl RawNode { } /// Appends and commits the ready value. + #[inline] pub fn advance_append(&mut self, rd: Ready) { self.commit_ready(rd); } /// Advance apply to the passed index. + #[inline] pub fn advance_apply(&mut self, applied: u64) { self.commit_apply(applied); } /// Status returns the current status of the given group. + #[inline] pub fn status(&self) -> Status { Status::new(&self.raft) } diff --git a/src/storage.rs b/src/storage.rs index 4e8a10761..67df549de 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -33,19 +33,30 @@ use std::sync::{Arc, RwLock, RwLockReadGuard, RwLockWriteGuard}; -use eraftpb::{ConfState, Entry, HardState, Snapshot}; +use eraftpb::{ConfChange, ConfState, Entry, HardState, Snapshot}; use errors::{Error, Result, StorageError}; use util; /// Holds both the hard state (commit index, vote leader, term) and the configuration state /// (Current node IDs) -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Getters, Setters)] pub struct RaftState { /// Contains the last meta information including commit index, the vote leader, and the vote term. pub hard_state: HardState, /// Records the current node IDs like `[1, 2, 3]` in the cluster. Every Raft node must have a unique ID in the cluster; pub conf_state: ConfState, + /// If this peer is in the middle of a membership change (The period between + /// `BeginMembershipChange` and `FinalizeMembershipChange`) this will hold the final desired + /// state. + #[get = "pub"] + #[set] + pending_conf_state: Option, + /// If `pending_conf_state` exists this will contain the index of the `BeginMembershipChange` + /// entry. + #[get = "pub"] + #[set] + pending_conf_state_start_index: Option, } /// Storage saves all the information about the current Raft implementation, including Raft Log, commit index, the leader to vote for, etc. @@ -108,6 +119,23 @@ impl MemStorageCore { self.hard_state = hs; } + /// Saves the current conf state. + pub fn set_conf_state( + &mut self, + cs: ConfState, + pending_membership_change: Option<(ConfState, u64)>, + ) { + self.snapshot.mut_metadata().set_conf_state(cs); + if let Some((cs, idx)) = pending_membership_change { + self.snapshot + .mut_metadata() + .set_pending_membership_change(cs); + self.snapshot + .mut_metadata() + .set_pending_membership_change_index(idx); + } + } + fn inner_last_index(&self) -> u64 { self.entries[0].get_index() + self.entries.len() as u64 - 1 } @@ -137,6 +165,7 @@ impl MemStorageCore { &mut self, idx: u64, cs: Option, + pending_membership_change: Option, data: Vec, ) -> Result<&Snapshot> { if idx <= self.snapshot.get_metadata().get_index() { @@ -158,6 +187,11 @@ impl MemStorageCore { if let Some(cs) = cs { self.snapshot.mut_metadata().set_conf_state(cs) } + if let Some(pending_change) = pending_membership_change { + let meta = self.snapshot.mut_metadata(); + meta.set_pending_membership_change(pending_change.get_configuration().clone()); + meta.set_pending_membership_change_index(pending_change.get_start_index()); + } self.snapshot.set_data(data); Ok(&self.snapshot) } @@ -257,10 +291,26 @@ impl Storage for MemStorage { /// Implements the Storage trait. fn initial_state(&self) -> Result { let core = self.rl(); - Ok(RaftState { + let mut state = RaftState { hard_state: core.hard_state.clone(), conf_state: core.snapshot.get_metadata().get_conf_state().clone(), - }) + pending_conf_state: None, + pending_conf_state_start_index: None, + }; + if core.snapshot.get_metadata().has_pending_membership_change() { + state.pending_conf_state = core + .snapshot + .get_metadata() + .get_pending_membership_change() + .clone() + .into(); + state.pending_conf_state_start_index = core + .snapshot + .get_metadata() + .get_pending_membership_change_index() + .into(); + } + Ok(state) } /// Implements the Storage trait. @@ -272,7 +322,11 @@ impl Storage for MemStorage { } if high > core.inner_last_index() + 1 { - panic!("index out of bound") + panic!( + "index out of bound (last: {}, high: {}", + core.inner_last_index() + 1, + high + ); } // only contains dummy entries. if core.entries.len() == 1 { @@ -539,7 +593,7 @@ mod test { storage .wl() - .create_snapshot(idx, Some(cs.clone()), data.clone()) + .create_snapshot(idx, Some(cs.clone()), None, data.clone()) .expect("create snapshot failed"); let result = storage.snapshot(); if result != wresult { diff --git a/src/util.rs b/src/util.rs index f74a76390..cfddf2ea6 100644 --- a/src/util.rs +++ b/src/util.rs @@ -16,6 +16,7 @@ use std::u64; +use eraftpb::{ConfChange, ConfChangeType, ConfState}; use protobuf::Message; /// A number to represent that there is no limit. @@ -69,3 +70,22 @@ pub fn limit_size(entries: &mut Vec, max: u64) { entries.truncate(limit); } + +// Bring some consistency to things. The protobuf has `nodes` and it's not really a term that's used anymore. +impl ConfState { + /// Get the voters. This is identical to `get_nodes()`. + #[inline] + pub fn get_voters(&self) -> &[u64] { + self.get_nodes() + } +} + +impl From<(u64, ConfState)> for ConfChange { + fn from((start_index, state): (u64, ConfState)) -> Self { + let mut change = ConfChange::new(); + change.set_change_type(ConfChangeType::BeginMembershipChange); + change.set_configuration(state); + change.set_start_index(start_index); + change + } +} diff --git a/tests/integration_cases/mod.rs b/tests/integration_cases/mod.rs index 1bed08b3f..da64313c5 100644 --- a/tests/integration_cases/mod.rs +++ b/tests/integration_cases/mod.rs @@ -11,6 +11,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod test_membership_changes; mod test_raft; mod test_raft_flow_control; mod test_raft_paper; diff --git a/tests/integration_cases/test_membership_changes.rs b/tests/integration_cases/test_membership_changes.rs new file mode 100644 index 000000000..d23ac17c3 --- /dev/null +++ b/tests/integration_cases/test_membership_changes.rs @@ -0,0 +1,1689 @@ +// Copyright 2019 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. +// +// +use hashbrown::{HashMap, HashSet}; +use protobuf::{self, RepeatedField}; +use raft::{ + eraftpb::{ + ConfChange, ConfChangeType, ConfState, Entry, EntryType, Message, MessageType, Snapshot, + }, + storage::MemStorage, + Config, Configuration, Raft, Result, INVALID_ID, NO_LIMIT, +}; +use std::ops::{Deref, DerefMut}; +use test_util::{new_message, setup_for_test, Network}; + +// Test that the API itself works. +// +// * Errors are returned from misuse. +// * Happy path returns happy values. +mod api { + use super::*; + // Test that the cluster can transition from a single node to a whole cluster. + #[test] + fn can_transition() -> Result<()> { + setup_for_test(); + let mut raft = Raft::new( + &Config { + id: 1, + tag: "1".into(), + peers: vec![1], + learners: vec![], + ..Default::default() + }, + MemStorage::new(), + )?; + let begin_conf_change = begin_conf_change(&[1, 2, 3], &[4], raft.raft_log.last_index() + 1); + raft.begin_membership_change(&begin_conf_change)?; + let finalize_conf_change = finalize_conf_change(); + raft.finalize_membership_change(&finalize_conf_change)?; + Ok(()) + } + + // Test if the process rejects an overlapping voter and learner set. + #[test] + fn checks_for_overlapping_membership() -> Result<()> { + setup_for_test(); + let mut raft = Raft::new( + &Config { + id: 1, + tag: "1".into(), + peers: vec![1], + learners: vec![], + ..Default::default() + }, + MemStorage::new(), + )?; + let begin_conf_change = + begin_conf_change(&[1, 2, 3], &[1, 2, 3], raft.raft_log.last_index() + 1); + assert!(raft.begin_membership_change(&begin_conf_change).is_err()); + Ok(()) + } + + // Test if the process rejects an voter demotion. + #[test] + fn checks_for_voter_demotion() -> Result<()> { + setup_for_test(); + let mut raft = Raft::new( + &Config { + id: 1, + tag: "1".into(), + peers: vec![1, 2, 3], + learners: vec![4], + ..Default::default() + }, + MemStorage::new(), + )?; + let begin_conf_change = begin_conf_change(&[1, 2], &[3, 4], raft.raft_log.last_index() + 1); + assert!(raft.begin_membership_change(&begin_conf_change).is_err()); + Ok(()) + } + + // Test if the process rejects an voter demotion. + #[test] + fn finalize_before_begin_fails_gracefully() -> Result<()> { + setup_for_test(); + let mut raft = Raft::new( + &Config { + id: 1, + tag: "1".into(), + peers: vec![1, 2, 3], + learners: vec![4], + ..Default::default() + }, + MemStorage::new(), + )?; + let finalize_conf_change = finalize_conf_change(); + assert!(raft + .finalize_membership_change(&finalize_conf_change) + .is_err()); + Ok(()) + } +} + +// Test that small cluster is able to progress through adding a voter. +mod three_peers_add_voter { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 3, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[3, 2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } +} + +// Test that small cluster is able to progress through adding a learner. +mod three_peers_add_learner { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 3], vec![4]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[3, 2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } +} + +// Test that small cluster is able to progress through removing a learner. +mod remove_learner { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![4]); + let new_configuration = (vec![1, 2, 3], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3, 4])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3, 4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[4, 3, 2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } +} + +// Test that small cluster is able to progress through removing a voter. +mod remove_voter { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2]); + + Ok(()) + } +} + +// Test that small cluster is able to progress through removing a leader. +mod remove_leader { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![2, 3], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration.clone())?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 3, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3]); + let peer_leaders = scenario.peer_leaders(); + for id in 1..=3 { + assert_eq!(peer_leaders[&id], INVALID_ID, "peer {}", id); + } + + info!("Prompting a new election."); + { + let new_leader = scenario.peers.get_mut(&2).unwrap(); + for _ in + new_leader.election_elapsed..=(new_leader.get_randomized_election_timeout() + 1) + { + new_leader.tick(); + } + } + let messages = scenario.read_messages(); + scenario.send(messages); + + info!("Verifying that all peers have the right peer group."); + for (_, peer) in scenario.peers.iter() { + assert_eq!( + peer.prs().configuration(), + &new_configuration.clone().into(), + ); + } + + info!("Verifying that old leader cannot disrupt the cluster."); + { + let old_leader = scenario.peers.get_mut(&1).unwrap(); + for _ in old_leader.get_heartbeat_elapsed()..=(old_leader.get_heartbeat_timeout() + 1) { + old_leader.tick(); + } + } + let messages = scenario.read_messages(); + scenario.send(messages); + Ok(()) + } + + /// If the leader fails after the `Begin`, then recovers after the `Finalize`, the group should ignore it. + #[test] + fn leader_fails_and_recovers() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![2, 3], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 3, 1])?; + + scenario.isolate(1); // Simulate the leader failing. + + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[2, 3]); + + // At this point, 1 thinks it is a leader, but actually it isn't anymore. + + info!("Prompting a new election."); + { + let new_leader = scenario.peers.get_mut(&2).unwrap(); + for _ in + new_leader.election_elapsed..=(new_leader.get_randomized_election_timeout() + 1) + { + new_leader.tick(); + } + } + let messages = scenario.read_messages(); + scenario.send(messages); + + scenario.recover(); + // Here we note that the old leader (1) has NOT applied the finalize operation and thus thinks it is still leader. + // + // The Raft paper notes that a removed leader should not disrupt the cluster. + // It suggests doing this by ignoring any `RequestVote` when it has heard from the leader within the minimum election timeout. + + info!("Verifying that old leader cannot disrupt the cluster."); + { + let old_leader = scenario.peers.get_mut(&1).unwrap(); + for _ in old_leader.get_heartbeat_elapsed()..=(old_leader.get_heartbeat_timeout() + 1) { + old_leader.tick(); + } + } + let messages = scenario.read_messages(); + scenario.send(messages); + + let peer_leader = scenario.peer_leaders(); + assert!(peer_leader[&2] != 1); + assert!(peer_leader[&3] != 1); + Ok(()) + } +} + +// Test that small cluster is able to progress through replacing a voter. +mod three_peers_replace_voter { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 4]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 4]); + + Ok(()) + } + + /// The leader power cycles before actually sending the messages. + #[test] + fn leader_power_cycles_no_compaction() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Leader power cycles."); + assert_eq!(scenario.peers[&1].began_membership_change_at(), Some(2)); + + if let Some(idx) = scenario.peers[&1].began_membership_change_at() { + let raft = scenario.peers.get_mut(&1).unwrap(); + let conf_state: ConfState = raft.prs().configuration().clone().into(); + let new_conf_state: ConfState = raft.prs().next_configuration().clone().unwrap().into(); + raft.mut_store() + .wl() + .set_conf_state(conf_state, Some((new_conf_state, idx))); + } + + scenario.power_cycle(&[1], None); + assert_eq!(scenario.peers[&1].began_membership_change_at(), Some(2)); + scenario.assert_in_membership_change(&[1]); + { + let peer = scenario.peers.get_mut(&1).unwrap(); + peer.become_candidate(); + peer.become_leader(); + for _ in peer.get_heartbeat_elapsed()..=(peer.get_heartbeat_timeout() + 1) { + peer.tick(); + } + } + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4, 1, 4, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[4, 3, 2, 1, 4, 3, 2, 1])?; + assert_eq!(scenario.peers[&1].began_membership_change_at(), Some(2)); + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 4, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 4]); + + Ok(()) + } + + /// The leader power cycles before actually sending the messages. + #[test] + fn leader_power_cycles_compacted_log() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Compacting leader's log"); + // This snapshot has a term 1. + let snapshot = { + let peer = scenario.peers.get_mut(&1).unwrap(); + warn!("BLAH {:?}", peer.pending_membership_change().clone()); + peer.raft_log.store.wl().create_snapshot( + 2, + ConfState::from(peer.prs().configuration().clone()).into(), + peer.pending_membership_change().clone(), + vec![], + )?; + let snapshot = peer.raft_log.snapshot()?; + warn!("BLAH {:?}", snapshot.get_metadata()); + peer.raft_log.store.wl().compact(2)?; + snapshot + }; + + // At this point, there is a sentinel at index 3, term 2. + + info!("Leader power cycles."); + assert_eq!(scenario.peers[&1].began_membership_change_at(), Some(2)); + scenario.power_cycle(&[1], snapshot.clone()); + { + let peer = scenario.peers.get_mut(&1).unwrap(); + peer.become_candidate(); + peer.become_leader(); + } + + assert_eq!(scenario.peers[&1].began_membership_change_at(), Some(2)); + scenario.assert_in_membership_change(&[1]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[1, 4, 1])?; // 1, 4, 1, 4, 1])?; + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + { + assert_eq!( + 3, + scenario.peers.get_mut(&4).unwrap().raft_log.unstable.offset + ); + let new_peer = scenario.peers.get_mut(&4).unwrap(); + let snap = new_peer.raft_log.snapshot().unwrap(); + new_peer.raft_log.store.wl().apply_snapshot(snap).unwrap(); + new_peer + .raft_log + .stable_snap_to(snapshot.get_metadata().get_index()); + } + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4, 3, 2, 1, 3, 2, 1])?; + assert_eq!(scenario.peers[&1].began_membership_change_at(), Some(2)); + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 4, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } + + // Ensure if a peer in the old quorum fails, but the quorum is still big enough, it's ok. + #[test] + fn pending_delete_fails_after_begin() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + scenario.isolate(3); // Take 3 down. + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 4]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 4]); + + Ok(()) + } + + // Ensure if a peer in the new quorum fails, but the quorum is still big enough, it's ok. + #[test] + fn pending_create_with_quorum_fails_after_begin() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + scenario.isolate(4); // Take 4 down. + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3]); + + Ok(()) + } + + // Ensure if the peer pending a deletion and the peer pending a creation both fail it's still ok (so long as both quorums hold). + #[test] + fn pending_create_and_destroy_both_fail() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + scenario.isolate(3); // Take 3 down. + scenario.isolate(4); // Take 4 down. + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2]); + + Ok(()) + } + + // Ensure if the old quorum fails during the joint state progress will halt until the peer group is recovered. + #[test] + fn old_quorum_fails() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Old quorum fails."); + scenario.isolate(3); // Take 3 down. + scenario.isolate(2); // Take 2 down. + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1, 4, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 4]); + scenario.assert_not_in_membership_change(&[2, 3]); + + info!("Spinning for awhile to ensure nothing spectacular happens"); + for _ in scenario.peers[&leader].get_heartbeat_elapsed() + ..=scenario.peers[&leader].get_heartbeat_timeout() + { + scenario.peers.iter_mut().for_each(|(_, peer)| { + peer.tick(); + }); + let messages = scenario.read_messages(); + scenario.dispatch(messages)?; + } + + scenario.assert_in_membership_change(&[1, 4]); + scenario.assert_not_in_membership_change(&[2, 3]); + + info!("Recovering old qourum."); + scenario.recover(); + + for _ in scenario.peers[&leader].get_heartbeat_elapsed() + ..=scenario.peers[&leader].get_heartbeat_timeout() + { + scenario.peers.iter_mut().for_each(|(_, peer)| { + peer.tick(); + }); + } + + info!("Giving the peer group time to recover."); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3, 4, 1, 2, 3, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Failed peers confirming they have commited the begin."); + scenario.expect_read_and_dispatch_messages_from(&[2, 3])?; + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } + + // Ensure if the new quorum fails during the joint state progress will halt until the peer group is recovered. + #[test] + fn new_quorum_fails() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("New quorum fails."); + scenario.isolate(4); // Take 4 down. + scenario.isolate(2); // Take 2 down. + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1, 3])?; + + info!("Leader waits to let the new quorum apply this before progressing."); + scenario.assert_in_membership_change(&[1]); + scenario.assert_not_in_membership_change(&[2, 3, 4]); + + info!("Spinning for awhile to ensure nothing spectacular happens"); + for _ in scenario.peers[&leader].get_heartbeat_elapsed() + ..=scenario.peers[&leader].get_heartbeat_timeout() + { + scenario.peers.iter_mut().for_each(|(_, peer)| { + peer.tick(); + }); + let messages = scenario.read_messages(); + scenario.dispatch(messages)?; + } + + scenario.assert_in_membership_change(&[1]); + scenario.assert_not_in_membership_change(&[2, 3, 4]); + + info!("Recovering new qourum."); + scenario.recover(); + + for _ in scenario.peers[&leader].get_heartbeat_elapsed() + ..=scenario.peers[&leader].get_heartbeat_timeout() + { + scenario.peers.iter_mut().for_each(|(_, peer)| { + peer.tick(); + }); + } + + info!("Giving the peer group time to recover."); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3, 4, 1, 2, 4, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3, 4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Failed peers confirming they have commited the begin."); + scenario.expect_read_and_dispatch_messages_from(&[2, 4])?; + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } +} + +// Test that small cluster is able to progress through adding a more with a learner. +mod three_peers_to_five_with_learner { + use super::*; + + /// In a steady state transition should proceed without issue. + #[test] + fn stable() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 3, 4, 5], vec![6]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 5, 6, 1, 4, 5, 6])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4, 5, 6], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4, 5, 6]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[3, 2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4, 5, 6], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4, 5, 6]); + + Ok(()) + } + + /// In this, a single node (of 3) halts during the transition. + #[test] + fn minority_old_followers_halt_at_start() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 3, 4, 5], vec![6]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.isolate(3); + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2]); + scenario.assert_not_in_membership_change(&[3]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 5, 6, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4, 5, 6], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 4, 5, 6]); + scenario.assert_not_in_membership_change(&[3]); + + scenario.expect_read_and_dispatch_messages_from(&[4, 5, 6])?; + + info!("Cluster leaving the joint."); + { + let leader = scenario.peers.get_mut(&1).unwrap(); + let ticks = leader.get_heartbeat_timeout(); + for _ in 0..=ticks { + leader.tick(); + } + } + scenario.expect_read_and_dispatch_messages_from(&[2, 1, 4, 5, 6, 1, 4, 5, 6, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 4, 5], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 4, 5]); + scenario.assert_not_in_membership_change(&[3]); + + Ok(()) + } +} + +mod intermingled_config_changes { + use super::*; + + // In this test, we make sure that if the peer group is sent a `BeginMembershipChange`, then immediately a `AddNode` entry, that the `AddNode` is rejected by the leader. + #[test] + fn begin_then_add_node() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 3, 4], vec![]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader recieves an add node proposal, which it rejects since it is already in transition."); + scenario.propose_add_node_message(4).is_err(); + assert_eq!( + scenario.peers[&scenario.old_leader] + .raft_log + .entries(4, 1) + .unwrap()[0] + .get_entry_type(), + EntryType::EntryNormal + ); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[3, 2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } +} + +mod compaction { + use super::*; + + // Ensure that if a Raft compacts its log before finalizing that there are no failures. + #[test] + fn begin_compact_then_finalize() -> Result<()> { + setup_for_test(); + let leader = 1; + let old_configuration = (vec![1, 2, 3], vec![]); + let new_configuration = (vec![1, 2, 3], vec![4]); + let mut scenario = Scenario::new(leader, old_configuration, new_configuration)?; + scenario.spawn_new_peers()?; + scenario.propose_change_message()?; + + info!("Allowing quorum to commit"); + scenario.expect_read_and_dispatch_messages_from(&[1, 2, 3])?; + + info!("Advancing leader, now entered the joint"); + scenario.assert_can_apply_transition_entry_at_index( + &[1], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1]); + + info!("Leader replicates the commit and finalize entry."); + scenario.expect_read_and_dispatch_messages_from(&[1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[2, 3], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3]); + + info!("Allowing new peers to catch up."); + scenario.expect_read_and_dispatch_messages_from(&[4, 1, 4])?; + scenario.assert_can_apply_transition_entry_at_index( + &[4], + 2, + ConfChangeType::BeginMembershipChange, + ); + scenario.assert_in_membership_change(&[1, 2, 3, 4]); + + info!("Compacting the leaders log"); + scenario + .peers + .get_mut(&1) + .unwrap() + .raft_log + .store + .wl() + .compact(2)?; + + info!("Cluster leaving the joint."); + scenario.expect_read_and_dispatch_messages_from(&[3, 2, 1])?; + scenario.assert_can_apply_transition_entry_at_index( + &[1, 2, 3, 4], + 3, + ConfChangeType::FinalizeMembershipChange, + ); + scenario.assert_not_in_membership_change(&[1, 2, 3, 4]); + + Ok(()) + } +} + +/// A test harness providing some useful utility and shorthand functions appropriate for this test suite. +/// +/// Since it derefs into `Network` it can be used the same way. So it acts as a transparent set of utilities over the standard `Network`. +/// The goal here is to boil down the test suite for Joint Consensus into the simplest terms possible, while allowing for control. +struct Scenario { + old_configuration: Configuration, + old_leader: u64, + new_configuration: Configuration, + network: Network, +} +impl Deref for Scenario { + type Target = Network; + fn deref(&self) -> &Network { + &self.network + } +} +impl DerefMut for Scenario { + fn deref_mut(&mut self) -> &mut Network { + &mut self.network + } +} + +// TODO: Explore moving some functionality to `Network`. +impl Scenario { + /// Create a new scenario with the given state. + fn new( + leader: u64, + old_configuration: impl Into, + new_configuration: impl Into, + ) -> Result { + let old_configuration = old_configuration.into(); + let new_configuration = new_configuration.into(); + info!( + "Beginning scenario, old: {:?}, new: {:?}", + old_configuration, new_configuration + ); + let starting_peers = old_configuration + .voters() + .iter() + .chain(old_configuration.learners().iter()) + .map(|&id| { + Some( + Raft::new( + &Config { + id, + peers: old_configuration.voters().iter().cloned().collect(), + learners: old_configuration.learners().iter().cloned().collect(), + tag: format!("{}", id), + ..Default::default() + }, + MemStorage::new(), + ) + .unwrap() + .into(), + ) + }) + .collect(); + let mut scenario = Scenario { + old_leader: leader, + old_configuration, + new_configuration, + network: Network::new(starting_peers), + }; + // Elect the leader. + info!("Sending MsgHup to predetermined leader ({})", leader); + let message = new_message(leader, leader, MessageType::MsgHup, 0); + scenario.send(vec![message]); + Ok(scenario) + } + + /// Creates any peers which are pending creation. + /// + /// This *only* creates the peers and adds them to the `Network`. It does not take other + /// action. Newly created peers are only aware of the leader and themself. + fn spawn_new_peers(&mut self) -> Result<()> { + let storage = MemStorage::new(); + let new_peers = self.new_peers(); + info!("Creating new peers. {:?}", new_peers); + for &id in new_peers.voters() { + let raft = Raft::new( + &Config { + id, + peers: vec![self.old_leader, id], + learners: vec![], + tag: format!("{}", id), + ..Default::default() + }, + storage.clone(), + )?; + self.peers.insert(id, raft.into()); + } + for &id in new_peers.learners() { + let raft = Raft::new( + &Config { + id, + peers: vec![self.old_leader], + learners: vec![id], + tag: format!("{}", id), + ..Default::default() + }, + storage.clone(), + )?; + self.peers.insert(id, raft.into()); + } + Ok(()) + } + + /// Return the leader id according to each peer. + fn peer_leaders(&self) -> HashMap { + self.peers + .iter() + .map(|(&id, peer)| (id, peer.leader_id)) + .collect() + } + + /// Return a configuration containing only the peers pending creation. + fn new_peers(&self) -> Configuration { + let all_old = self + .old_configuration + .voters() + .union(&self.old_configuration.learners()) + .cloned() + .collect::>(); + Configuration::new( + self.new_configuration + .voters() + .difference(&all_old) + .cloned(), + self.new_configuration + .learners() + .difference(&all_old) + .cloned(), + ) + } + + /// Send a message proposing a "one-by-one" style AddNode configuration. + /// If the peers are in the midst joint consensus style (Begin/FinalizeMembershipChange) change they should reject it. + fn propose_add_node_message(&mut self, id: u64) -> Result<()> { + info!("Proposing add_node message. Target: {:?}", id,); + let message = build_propose_add_node_message( + self.old_leader, + id, + self.peers[&id].raft_log.last_index() + 1, + ); + self.dispatch(vec![message]) + } + + /// Send the message which proposes the configuration change. + fn propose_change_message(&mut self) -> Result<()> { + info!( + "Proposing change message. Target: {:?}", + self.new_configuration + ); + let message = build_propose_change_message( + self.old_leader, + self.new_configuration.voters(), + self.new_configuration.learners(), + self.peers[&1].raft_log.last_index() + 1, + ); + self.dispatch(vec![message]) + } + + /// Checks that the given peers are not in a transition state. + fn assert_not_in_membership_change<'a>(&self, peers: impl IntoIterator) { + for peer in peers.into_iter().map(|id| &self.peers[id]) { + assert!( + !peer.is_in_membership_change(), + "Peer {} should not have been in a membership change.", + peer.id + ); + } + } + + // Checks that the given peers are in a transition state. + fn assert_in_membership_change<'a>(&self, peers: impl IntoIterator) { + for peer in peers.into_iter().map(|id| &self.peers[id]) { + assert!( + peer.is_in_membership_change(), + "Peer {} should have been in a membership change.", + peer.id + ); + } + } + + /// Reads the pending entries to be applied to a raft peer, checks one is of the expected variant, and applies it. Then, it advances the node to that point in the configuration change. + fn expect_apply_membership_change_entry<'a>( + &mut self, + peers: impl IntoIterator, + entry_type: ConfChangeType, + ) -> Result<()> { + for peer in peers { + debug!( + "Advancing peer {}, expecting a {:?} entry.", + peer, entry_type + ); + let peer = self.network.peers.get_mut(peer).unwrap(); + if let Some(entries) = peer.raft_log.next_entries() { + peer.mut_store().wl().append(&entries).unwrap(); + let mut found = false; + for entry in &entries { + if entry.get_entry_type() == EntryType::EntryConfChange { + let conf_change = + protobuf::parse_from_bytes::(entry.get_data())?; + if conf_change.get_change_type() == entry_type { + found = true; + match entry_type { + ConfChangeType::BeginMembershipChange => { + peer.begin_membership_change(&conf_change)? + } + ConfChangeType::FinalizeMembershipChange => { + peer.finalize_membership_change(&conf_change)? + } + ConfChangeType::AddNode => { + peer.add_node(conf_change.get_node_id())? + } + _ => panic!("Unexpected conf change"), + }; + } + } + if found { + peer.raft_log.stable_to(entry.get_index(), entry.get_term()); + peer.raft_log.commit_to(entry.get_index()); + peer.commit_apply(entry.get_index()); + let hs = peer.hard_state(); + peer.mut_store().wl().set_hardstate(hs); + peer.tick(); + break; + } + } + assert!( + found, + "{:?} message not found for peer {}. Got: {:?}", + entry_type, peer.id, entries + ); + } else { + panic!("Didn't have any entries {}", peer.id); + } + } + Ok(()) + } + + /// Reads messages from each peer in a given list, and dispatches their message before moving to the next peer. + /// + /// Expects each peer to have a message. If the message is not defintely sent use `read_and_dispatch_messages_from`. + fn expect_read_and_dispatch_messages_from<'a>( + &mut self, + peers: impl IntoIterator, + ) -> Result<()> { + let peers = peers.into_iter().cloned(); + for (step, peer) in peers.enumerate() { + info!( + "Expecting and dispatching messages from {} at step {}.", + peer, step + ); + let messages = self.peers.get_mut(&peer).unwrap().read_messages(); + trace!("{} sends messages: {:?}", peer, messages); + assert!( + !messages.is_empty(), + "Expected peer {} to have messages at step {}.", + peer, + step + ); + self.dispatch(messages)?; + } + Ok(()) + } + + /// Simulate a power cycle in the given nodes. + /// + /// This means that the MemStorage is kept, but nothing else. + fn power_cycle<'a>( + &mut self, + peers: impl IntoIterator, + snapshot: impl Into>, + ) { + let peers = peers.into_iter().cloned(); + let snapshot = snapshot.into(); + for id in peers { + debug!("Power cycling {}.", id); + let applied = self.peers[&id].raft_log.applied; + let mut peer = self.peers.remove(&id).expect("Peer did not exist."); + let store = peer.mut_store().clone(); + + let mut peer = Raft::new( + &Config { + id, + tag: format!("{}", id), + applied: applied, + ..Default::default() + }, + store, + ) + .expect("Could not create new Raft"); + + if let Some(ref snapshot) = snapshot { + peer.restore(snapshot.clone()); + }; + self.peers.insert(id, peer.into()); + } + } + + // Verify there is a transition entry at the given index of the given variant. + fn assert_membership_change_entry_at<'a>( + &self, + peers: impl IntoIterator, + index: u64, + entry_type: ConfChangeType, + ) { + let peers = peers.into_iter().cloned(); + for peer in peers { + let entry = &self.peers[&peer] + .raft_log + .slice(index, index + 1, NO_LIMIT) + .unwrap()[0]; + assert_eq!(entry.get_entry_type(), EntryType::EntryConfChange); + let conf_change = protobuf::parse_from_bytes::(entry.get_data()).unwrap(); + assert_eq!(conf_change.get_change_type(), entry_type); + } + } + + fn assert_can_apply_transition_entry_at_index<'a>( + &mut self, + peers: impl IntoIterator, + index: u64, + entry_type: ConfChangeType, + ) { + let peers = peers.into_iter().collect::>(); + self.expect_apply_membership_change_entry(peers.clone(), entry_type) + .unwrap(); + self.assert_membership_change_entry_at(peers, index, entry_type) + } +} + +fn conf_state<'a>( + voters: impl IntoIterator, + learners: impl IntoIterator, +) -> ConfState { + let voters = voters.into_iter().cloned().collect::>(); + let learners = learners.into_iter().cloned().collect::>(); + let mut conf_state = ConfState::new(); + conf_state.set_nodes(voters); + conf_state.set_learners(learners); + conf_state +} + +fn begin_conf_change<'a>( + voters: impl IntoIterator, + learners: impl IntoIterator, + index: u64, +) -> ConfChange { + let conf_state = conf_state(voters, learners); + let mut conf_change = ConfChange::new(); + conf_change.set_change_type(ConfChangeType::BeginMembershipChange); + conf_change.set_configuration(conf_state); + conf_change.set_start_index(index); + conf_change +} + +fn finalize_conf_change<'a>() -> ConfChange { + let mut conf_change = ConfChange::new(); + conf_change.set_change_type(ConfChangeType::FinalizeMembershipChange); + conf_change +} + +fn begin_entry<'a>( + voters: impl IntoIterator, + learners: impl IntoIterator, + index: u64, +) -> Entry { + let conf_change = begin_conf_change(voters, learners, index); + let data = protobuf::Message::write_to_bytes(&conf_change).unwrap(); + let mut entry = Entry::new(); + entry.set_entry_type(EntryType::EntryConfChange); + entry.set_data(data); + entry.set_index(index); + entry +} + +fn build_propose_change_message<'a>( + recipient: u64, + voters: impl IntoIterator, + learners: impl IntoIterator, + index: u64, +) -> Message { + let begin_entry = begin_entry(voters, learners, index); + let mut message = Message::new(); + message.set_to(recipient); + message.set_msg_type(MessageType::MsgPropose); + message.set_index(index); + message.set_entries(RepeatedField::from_vec(vec![begin_entry])); + message +} + +fn build_propose_add_node_message(recipient: u64, added_id: u64, index: u64) -> Message { + let add_nodes_entry = { + let mut conf_change = ConfChange::new(); + conf_change.set_change_type(ConfChangeType::AddNode); + conf_change.set_node_id(added_id); + let data = protobuf::Message::write_to_bytes(&conf_change).unwrap(); + let mut entry = Entry::new(); + entry.set_entry_type(EntryType::EntryConfChange); + entry.set_data(data); + entry.set_index(index); + entry + }; + let mut message = Message::new(); + message.set_to(recipient); + message.set_msg_type(MessageType::MsgPropose); + message.set_index(index); + message.set_entries(RepeatedField::from_vec(vec![add_nodes_entry])); + message +} diff --git a/tests/integration_cases/test_raft.rs b/tests/integration_cases/test_raft.rs index ab2f85cd3..4b31e91d6 100644 --- a/tests/integration_cases/test_raft.rs +++ b/tests/integration_cases/test_raft.rs @@ -91,7 +91,7 @@ fn next_ents(r: &mut Raft, s: &MemStorage) -> Vec { r.raft_log.stable_to(last_idx, last_term); let ents = r.raft_log.next_entries(); let committed = r.raft_log.committed; - r.raft_log.applied_to(committed); + r.commit_apply(committed); ents.unwrap_or_else(Vec::new) } @@ -2741,7 +2741,7 @@ fn test_restore() { ); assert_eq!( sm.prs().voter_ids(), - &s.get_metadata() + s.get_metadata() .get_conf_state() .get_nodes() .iter() @@ -2849,7 +2849,7 @@ fn test_slow_node_restore() { cs.set_nodes(nt.peers[&1].prs().voter_ids().iter().cloned().collect()); nt.storage[&1] .wl() - .create_snapshot(nt.peers[&1].raft_log.applied, Some(cs), vec![]) + .create_snapshot(nt.peers[&1].raft_log.applied, Some(cs), None, vec![]) .expect(""); nt.storage[&1] .wl() @@ -2950,18 +2950,21 @@ fn test_new_leader_pending_config() { // test_add_node tests that add_node could update nodes correctly. #[test] -fn test_add_node() { +fn test_add_node() -> Result<()> { setup_for_test(); + let mut r = new_test_raft(1, vec![1], 10, 1, new_storage()); - r.add_node(2); + r.add_node(2)?; assert_eq!( r.prs().voter_ids(), - &vec![1, 2].into_iter().collect::>() + vec![1, 2].into_iter().collect::>() ); + + Ok(()) } #[test] -fn test_add_node_check_quorum() { +fn test_add_node_check_quorum() -> Result<()> { setup_for_test(); let mut r = new_test_raft(1, vec![1], 10, 1, new_storage()); r.check_quorum = true; @@ -2973,7 +2976,7 @@ fn test_add_node_check_quorum() { r.tick(); } - r.add_node(2); + r.add_node(2)?; // This tick will reach electionTimeout, which triggers a quorum check. r.tick(); @@ -2988,20 +2991,24 @@ fn test_add_node_check_quorum() { } assert_eq!(r.state, StateRole::Follower); + + Ok(()) } // test_remove_node tests that removeNode could update pendingConf, nodes and // and removed list correctly. #[test] -fn test_remove_node() { +fn test_remove_node() -> Result<()> { setup_for_test(); + let mut r = new_test_raft(1, vec![1, 2], 10, 1, new_storage()); - r.remove_node(2); + r.remove_node(2)?; assert_eq!(r.prs().voter_ids().iter().next().unwrap(), &1); - // remove all nodes from cluster - r.remove_node(1); + r.remove_node(1)?; assert!(r.prs().voter_ids().is_empty()); + + Ok(()) } #[test] @@ -3033,7 +3040,7 @@ fn test_raft_nodes() { let r = new_test_raft(1, ids, 10, 1, new_storage()); let voter_ids = r.prs().voter_ids(); let wids = wids.into_iter().collect::>(); - if voter_ids != &wids { + if voter_ids != wids { panic!("#{}: nodes = {:?}, want {:?}", i, voter_ids, wids); } } @@ -3067,8 +3074,9 @@ fn test_campaign_while_leader_with_pre_vote(pre_vote: bool) { // test_commit_after_remove_node verifies that pending commands can become // committed when a config change reduces the quorum requirements. #[test] -fn test_commit_after_remove_node() { +fn test_commit_after_remove_node() -> Result<()> { setup_for_test(); + // Create a cluster with two nodes. let s = new_storage(); let mut r = new_test_raft(1, vec![1, 2], 5, 1, s.clone()); @@ -3108,11 +3116,13 @@ fn test_commit_after_remove_node() { // Apply the config change. This reduces quorum requirements so the // pending command can now commit. - r.remove_node(2); + r.remove_node(2)?; let ents = next_ents(&mut r, &s); assert_eq!(ents.len(), 1); assert_eq!(ents[0].get_entry_type(), EntryType::EntryNormal); assert_eq!(ents[0].get_data(), b"hello"); + + Ok(()) } // test_leader_transfer_to_uptodate_node verifies transferring should succeed @@ -3229,7 +3239,7 @@ fn test_leader_transfer_after_snapshot() { cs.set_nodes(nt.peers[&1].prs().voter_ids().iter().cloned().collect()); nt.storage[&1] .wl() - .create_snapshot(nt.peers[&1].raft_log.applied, Some(cs), vec![]) + .create_snapshot(nt.peers[&1].raft_log.applied, Some(cs), None, vec![]) .expect(""); nt.storage[&1] .wl() @@ -3363,7 +3373,7 @@ fn test_leader_transfer_receive_higher_term_vote() { } #[test] -fn test_leader_transfer_remove_node() { +fn test_leader_transfer_remove_node() -> Result<()> { setup_for_test(); let mut nt = Network::new(vec![None, None, None]); nt.send(vec![new_message(1, 1, MessageType::MsgHup, 0)]); @@ -3374,9 +3384,11 @@ fn test_leader_transfer_remove_node() { nt.send(vec![new_message(3, 1, MessageType::MsgTransferLeader, 0)]); assert_eq!(nt.peers[&1].lead_transferee.unwrap(), 3); - nt.peers.get_mut(&1).unwrap().remove_node(3); + nt.peers.get_mut(&1).unwrap().remove_node(3)?; check_leader_transfer_state(&nt.peers[&1], StateRole::Leader, 1); + + Ok(()) } // test_leader_transfer_back verifies leadership can transfer @@ -3575,8 +3587,9 @@ fn test_learner_election_timeout() { // TestLearnerPromotion verifies that the leaner should not election until // it is promoted to a normal peer. #[test] -fn test_learner_promotion() { +fn test_learner_promotion() -> Result<()> { setup_for_test(); + let mut n1 = new_test_learner_raft(1, vec![1], vec![2], 10, 1, new_storage()); n1.become_follower(1, INVALID_ID); @@ -3603,8 +3616,8 @@ fn test_learner_promotion() { network.send(vec![heart_beat.clone()]); // Promote n2 from learner to follower. - network.peers.get_mut(&1).unwrap().add_node(2); - network.peers.get_mut(&2).unwrap().add_node(2); + network.peers.get_mut(&1).unwrap().add_node(2)?; + network.peers.get_mut(&2).unwrap().add_node(2)?; assert_eq!(network.peers[&2].state, StateRole::Follower); assert!(!network.peers[&2].is_learner); @@ -3623,6 +3636,8 @@ fn test_learner_promotion() { network.send(vec![heart_beat]); assert_eq!(network.peers[&1].state, StateRole::Follower); assert_eq!(network.peers[&2].state, StateRole::Leader); + + Ok(()) } // TestLearnerLogReplication tests that a learner can receive entries from the leader. @@ -3746,7 +3761,7 @@ fn test_learner_receive_snapshot() { n1.restore(s); let committed = n1.raft_log.committed; - n1.raft_log.applied_to(committed); + n1.commit_apply(committed); let mut network = Network::new(vec![Some(n1), Some(n2)]); @@ -3774,44 +3789,52 @@ fn test_learner_receive_snapshot() { // TestAddLearner tests that addLearner could update nodes correctly. #[test] -fn test_add_learner() { +fn test_add_learner() -> Result<()> { setup_for_test(); let mut n1 = new_test_raft(1, vec![1], 10, 1, new_storage()); - n1.add_learner(2); + n1.add_learner(2)?; assert_eq!(*n1.prs().learner_ids().iter().next().unwrap(), 2); assert!(n1.prs().learner_ids().contains(&2)); + + Ok(()) } // Ensure when add_voter is called on a peers own ID that it will be promoted. // When the action fails, ensure it doesn't mutate the raft state. #[test] -fn test_add_voter_peer_promotes_self_sets_is_learner() { +fn test_add_voter_peer_promotes_self_sets_is_learner() -> Result<()> { setup_for_test(); + let mut n1 = new_test_raft(1, vec![1], 10, 1, new_storage()); // Node is already voter. - n1.add_learner(1); + n1.add_learner(1).ok(); assert_eq!(n1.is_learner, false); assert!(n1.prs().voter_ids().contains(&1)); - n1.remove_node(1); - n1.add_learner(1); + n1.remove_node(1)?; + n1.add_learner(1)?; assert_eq!(n1.is_learner, true); assert!(n1.prs().learner_ids().contains(&1)); + + Ok(()) } // TestRemoveLearner tests that removeNode could update nodes and // and removed list correctly. #[test] -fn test_remove_learner() { +fn test_remove_learner() -> Result<()> { setup_for_test(); + let mut n1 = new_test_learner_raft(1, vec![1], vec![2], 10, 1, new_storage()); - n1.remove_node(2); + n1.remove_node(2)?; assert_eq!(n1.prs().voter_ids().iter().next().unwrap(), &1); assert!(n1.prs().learner_ids().is_empty()); - n1.remove_node(1); + n1.remove_node(1)?; assert!(n1.prs().voter_ids().is_empty()); assert_eq!(n1.prs().learner_ids().len(), 0); + + Ok(()) } // simulate rolling update a cluster for Pre-Vote. cluster has 3 nodes [n1, n2, n3]. @@ -3925,8 +3948,9 @@ fn test_prevote_migration_with_free_stuck_pre_candidate() { } #[test] -fn test_learner_respond_vote() { +fn test_learner_respond_vote() -> Result<()> { setup_for_test(); + let mut n1 = new_test_learner_raft(1, vec![1, 2], vec![3], 10, 1, new_storage()); n1.become_follower(1, INVALID_ID); n1.reset_randomized_election_timeout(); @@ -3948,9 +3972,11 @@ fn test_learner_respond_vote() { assert_eq!(network.peers[&1].state, StateRole::Candidate); // After promote 3 to voter, election should success. - network.peers.get_mut(&1).unwrap().add_node(3); + network.peers.get_mut(&1).unwrap().add_node(3)?; do_campaign(&mut network); assert_eq!(network.peers[&1].state, StateRole::Leader); + + Ok(()) } #[test] diff --git a/tests/integration_cases/test_raft_paper.rs b/tests/integration_cases/test_raft_paper.rs index c68b8d6b7..fd309260b 100644 --- a/tests/integration_cases/test_raft_paper.rs +++ b/tests/integration_cases/test_raft_paper.rs @@ -48,7 +48,7 @@ fn commit_noop_entry(r: &mut Interface, s: &MemStorage) { .append(r.raft_log.unstable_entries().unwrap_or(&[])) .expect(""); let committed = r.raft_log.committed; - r.raft_log.applied_to(committed); + r.commit_apply(committed); let (last_index, last_term) = (r.raft_log.last_index(), r.raft_log.last_term()); r.raft_log.stable_to(last_index, last_term); } diff --git a/tests/integration_cases/test_raw_node.rs b/tests/integration_cases/test_raw_node.rs index f75c251e0..abe8503c8 100644 --- a/tests/integration_cases/test_raw_node.rs +++ b/tests/integration_cases/test_raw_node.rs @@ -246,7 +246,7 @@ fn test_raw_node_propose_add_duplicate_node() { for e in rd.committed_entries.as_ref().unwrap() { if e.get_entry_type() == EntryType::EntryConfChange { let conf_change = protobuf::parse_from_bytes(e.get_data()).unwrap(); - raw_node.apply_conf_change(&conf_change); + raw_node.apply_conf_change(&conf_change).ok(); } } raw_node.advance(rd); @@ -274,7 +274,7 @@ fn test_raw_node_propose_add_duplicate_node() { } #[test] -fn test_raw_node_propose_add_learner_node() { +fn test_raw_node_propose_add_learner_node() -> Result<()> { setup_for_test(); let s = new_storage(); let mut raw_node = new_raw_node(1, vec![], 10, 1, s.clone(), vec![new_peer(1)]); @@ -307,9 +307,11 @@ fn test_raw_node_propose_add_learner_node() { let e = &rd.committed_entries.as_ref().unwrap()[0]; let conf_change = protobuf::parse_from_bytes(e.get_data()).unwrap(); - let conf_state = raw_node.apply_conf_change(&conf_change); + let conf_state = raw_node.apply_conf_change(&conf_change)?; assert_eq!(conf_state.nodes, vec![1]); assert_eq!(conf_state.learners, vec![2]); + + Ok(()) } // test_raw_node_read_index ensures that RawNode.read_index sends the MsgReadIndex message diff --git a/tests/test_util/mod.rs b/tests/test_util/mod.rs index 349fabcba..1e21cf66e 100644 --- a/tests/test_util/mod.rs +++ b/tests/test_util/mod.rs @@ -77,6 +77,12 @@ pub struct Interface { pub raft: Option>, } +impl From> for Interface { + fn from(value: Raft) -> Self { + Interface::new(value) + } +} + impl Interface { pub fn new(r: Raft) -> Interface { Interface { raft: Some(r) } @@ -153,6 +159,7 @@ pub fn new_test_raft_with_prevote( ) -> Interface { let mut config = new_test_config(id, peers, election, heartbeat); config.pre_vote = pre_vote; + config.tag = format!("{}", id); new_test_raft_with_config(&config, storage) } @@ -221,7 +228,7 @@ struct Connem { to: u64, } -#[allow(declare_interior_mutable_const)] +#[allow(clippy::declare_interior_mutable_const)] pub const NOP_STEPPER: Option = Some(Interface { raft: None }); #[derive(Default)] @@ -279,8 +286,8 @@ impl Network { self.ignorem.insert(t, true); } - pub fn filter(&self, mut msgs: Vec) -> Vec { - msgs.drain(..) + pub fn filter(&self, msgs: impl IntoIterator) -> Vec { + msgs.into_iter() .filter(|m| { if self .ignorem @@ -305,6 +312,13 @@ impl Network { .collect() } + pub fn read_messages(&mut self) -> Vec { + self.peers + .iter_mut() + .flat_map(|(_peer, progress)| progress.read_messages()) + .collect() + } + pub fn send(&mut self, msgs: Vec) { let mut msgs = msgs; while !msgs.is_empty() { @@ -321,6 +335,18 @@ impl Network { } } + /// Dispatches the given messages to the appropriate peers. + /// + /// Unlike `send` this does not gather and send any responses. It also does not ignore errors. + pub fn dispatch(&mut self, messages: impl IntoIterator) -> Result<()> { + for message in self.filter(messages) { + let to = message.get_to(); + let peer = self.peers.get_mut(&to).unwrap(); + peer.step(message)?; + } + Ok(()) + } + pub fn drop(&mut self, from: u64, to: u64, perc: f64) { self.dropm.insert(Connem { from, to }, perc); }